From 1379f9b3b36af6dd52eb079b687571026c92e325 Mon Sep 17 00:00:00 2001 From: Kogia-sima Date: Mon, 15 Jun 2020 12:06:14 +0900 Subject: [PATCH] Escape single quote (') It seems that EJS escapes this character. --- sailfish/src/runtime/escape/avx2.rs | 38 +++++++++------- sailfish/src/runtime/escape/fallback.rs | 6 +-- sailfish/src/runtime/escape/mod.rs | 14 +++--- sailfish/src/runtime/escape/sse2.rs | 59 ++++++++++++++----------- 4 files changed, 65 insertions(+), 52 deletions(-) diff --git a/sailfish/src/runtime/escape/avx2.rs b/sailfish/src/runtime/escape/avx2.rs index ab7ba7c..b75043b 100644 --- a/sailfish/src/runtime/escape/avx2.rs +++ b/sailfish/src/runtime/escape/avx2.rs @@ -30,9 +30,9 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) { let mut start_ptr = bytes.as_ptr(); let end_ptr = start_ptr.add(len); - let v_independent1 = _mm256_set1_epi8(4); + let v_independent1 = _mm256_set1_epi8(5); let v_independent2 = _mm256_set1_epi8(2); - let v_key1 = _mm256_set1_epi8(0x26); + let v_key1 = _mm256_set1_epi8(0x27); let v_key2 = _mm256_set1_epi8(0x3e); let maskgen = |x: __m256i| -> i32 { @@ -55,14 +55,17 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) { } let c = ESCAPE_LUT[*ptr2 as usize] as usize; - debug_assert!(c < ESCAPED_LEN); - if start_ptr < ptr2 { - let slc = - slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); - buffer.push_str(std::str::from_utf8_unchecked(slc)); + if c < ESCAPED_LEN { + if start_ptr < ptr2 { + let slc = slice::from_raw_parts( + start_ptr, + ptr2 as usize - start_ptr as usize, + ); + buffer.push_str(std::str::from_utf8_unchecked(slc)); + } + buffer.push_str(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); } - buffer.push_str(*ESCAPED.get_unchecked(c)); - start_ptr = ptr2.add(1); mask ^= 1 << trailing_zeros; } } @@ -76,14 +79,17 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) { let trailing_zeros = mask.trailing_zeros() as usize; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; - debug_assert!(c < ESCAPED_LEN); - if start_ptr < ptr2 { - let slc = - slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); - buffer.push_str(std::str::from_utf8_unchecked(slc)); + if c < ESCAPED_LEN { + if start_ptr < ptr2 { + let slc = slice::from_raw_parts( + start_ptr, + ptr2 as usize - start_ptr as usize, + ); + buffer.push_str(std::str::from_utf8_unchecked(slc)); + } + buffer.push_str(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); } - buffer.push_str(*ESCAPED.get_unchecked(c)); - start_ptr = ptr2.add(1); mask ^= 1 << trailing_zeros; } diff --git a/sailfish/src/runtime/escape/fallback.rs b/sailfish/src/runtime/escape/fallback.rs index 663809b..540b934 100644 --- a/sailfish/src/runtime/escape/fallback.rs +++ b/sailfish/src/runtime/escape/fallback.rs @@ -26,9 +26,9 @@ fn contains_zero_byte(x: usize) -> bool { #[inline] fn contains_key(x: usize) -> bool { - const INDEPENDENTS1: usize = 0x0404_0404_0404_0404_u64 as usize; + const INDEPENDENTS1: usize = 0x0505_0505_0505_0505_u64 as usize; const INDEPENDENTS2: usize = 0x0202_0202_0202_0202_u64 as usize; - const KEY1: usize = 0x2626_2626_2626_2626_u64 as usize; + const KEY1: usize = 0x2727_2727_2727_2727_u64 as usize; const KEY2: usize = 0x3e3e_3e3e_3e3e_3e3e_u64 as usize; let y1 = x | INDEPENDENTS1; @@ -72,9 +72,7 @@ pub unsafe fn escape_aligned( debug_assert_eq!((ptr as usize) % USIZE_BYTES, 0); let chunk = *(ptr as *const usize); - eprintln!("# {:x}", chunk); if contains_key(chunk) { - eprintln!("true!"); start_ptr = naive::proceed(buffer, start_ptr, ptr, ptr.add(USIZE_BYTES)) } ptr = ptr.add(USIZE_BYTES); diff --git a/sailfish/src/runtime/escape/mod.rs b/sailfish/src/runtime/escape/mod.rs index 1a5cbd9..2a4eab7 100644 --- a/sailfish/src/runtime/escape/mod.rs +++ b/sailfish/src/runtime/escape/mod.rs @@ -11,8 +11,8 @@ use super::buffer::Buffer; static ESCAPE_LUT: [u8; 256] = [ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 2, 9, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 3, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, @@ -22,8 +22,8 @@ static ESCAPE_LUT: [u8; 256] = [ 9, 9, 9, 9, ]; -const ESCAPED: [&str; 4] = [""", "&", "<", ">"]; -const ESCAPED_LEN: usize = 4; +const ESCAPED: [&str; 5] = [""", "&", "'", "<", ">"]; +const ESCAPED_LEN: usize = 5; #[cfg(target_feature = "avx2")] pub(crate) fn escape_to_buf(feed: &str, buf: &mut Buffer) { @@ -108,7 +108,7 @@ mod tests { #[test] fn escape_short() { assert_eq!(escape("<"), "<"); - assert_eq!(escape("\"&<>"), ""&<>"); + assert_eq!(escape("\"&<>'"), ""&<>'"); assert_eq!( escape("{\"title\": \"This is a JSON!\"}"), "{"title": "This is a JSON!"}" @@ -124,8 +124,8 @@ mod tests { #[rustfmt::skip] fn escape_long() { assert_eq!( - escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqve/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###), - r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""### + escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqve/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###), + r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""### ); } diff --git a/sailfish/src/runtime/escape/sse2.rs b/sailfish/src/runtime/escape/sse2.rs index 8acdbe8..1b84ef7 100644 --- a/sailfish/src/runtime/escape/sse2.rs +++ b/sailfish/src/runtime/escape/sse2.rs @@ -27,9 +27,9 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) { let mut start_ptr = bytes.as_ptr(); let end_ptr = start_ptr.add(len); - let v_independent1 = _mm_set1_epi8(4); + let v_independent1 = _mm_set1_epi8(5); let v_independent2 = _mm_set1_epi8(2); - let v_key1 = _mm_set1_epi8(0x26); + let v_key1 = _mm_set1_epi8(0x27); let v_key2 = _mm_set1_epi8(0x3e); let maskgen = |x: __m128i| -> i32 { @@ -52,14 +52,17 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) { } let c = ESCAPE_LUT[*ptr2 as usize] as usize; - debug_assert!(c < ESCAPED_LEN); - if start_ptr < ptr2 { - let slc = - slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); - buffer.push_str(std::str::from_utf8_unchecked(slc)); + if c < ESCAPED_LEN { + if start_ptr < ptr2 { + let slc = slice::from_raw_parts( + start_ptr, + ptr2 as usize - start_ptr as usize, + ); + buffer.push_str(std::str::from_utf8_unchecked(slc)); + } + buffer.push_str(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); } - buffer.push_str(*ESCAPED.get_unchecked(c)); - start_ptr = ptr2.add(1); mask ^= 1 << trailing_zeros; } } @@ -77,9 +80,9 @@ pub unsafe fn escape_aligned( end_ptr: *const u8, ) { let mut next_ptr = ptr.add(VECTOR_BYTES); - let v_independent1 = _mm_set1_epi8(4); + let v_independent1 = _mm_set1_epi8(5); let v_independent2 = _mm_set1_epi8(2); - let v_key1 = _mm_set1_epi8(0x26); + let v_key1 = _mm_set1_epi8(0x27); let v_key2 = _mm_set1_epi8(0x3e); let maskgen = |x: __m128i| -> i32 { @@ -96,14 +99,17 @@ pub unsafe fn escape_aligned( let trailing_zeros = mask.trailing_zeros() as usize; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; - debug_assert!(c < ESCAPED_LEN); - if start_ptr < ptr2 { - let slc = - slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); - buffer.push_str(std::str::from_utf8_unchecked(slc)); + if c < ESCAPED_LEN { + if start_ptr < ptr2 { + let slc = slice::from_raw_parts( + start_ptr, + ptr2 as usize - start_ptr as usize, + ); + buffer.push_str(std::str::from_utf8_unchecked(slc)); + } + buffer.push_str(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); } - buffer.push_str(*ESCAPED.get_unchecked(c)); - start_ptr = ptr2.add(1); mask ^= 1 << trailing_zeros; } @@ -119,14 +125,17 @@ pub unsafe fn escape_aligned( let trailing_zeros = mask.trailing_zeros() as usize; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; - debug_assert!(c < ESCAPED_LEN); - if start_ptr < ptr2 { - let slc = - slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); - buffer.push_str(std::str::from_utf8_unchecked(slc)); + if c < ESCAPED_LEN { + if start_ptr < ptr2 { + let slc = slice::from_raw_parts( + start_ptr, + ptr2 as usize - start_ptr as usize, + ); + buffer.push_str(std::str::from_utf8_unchecked(slc)); + } + buffer.push_str(*ESCAPED.get_unchecked(c)); + start_ptr = ptr2.add(1); } - buffer.push_str(*ESCAPED.get_unchecked(c)); - start_ptr = ptr2.add(1); mask ^= 1 << trailing_zeros; }