Escape single quote (')

It seems that EJS escapes this character.
This commit is contained in:
Kogia-sima 2020-06-15 12:06:14 +09:00
parent 4803792d4c
commit 1379f9b3b3
4 changed files with 65 additions and 52 deletions

View File

@ -30,9 +30,9 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
let mut start_ptr = bytes.as_ptr();
let end_ptr = start_ptr.add(len);
let v_independent1 = _mm256_set1_epi8(4);
let v_independent1 = _mm256_set1_epi8(5);
let v_independent2 = _mm256_set1_epi8(2);
let v_key1 = _mm256_set1_epi8(0x26);
let v_key1 = _mm256_set1_epi8(0x27);
let v_key2 = _mm256_set1_epi8(0x3e);
let maskgen = |x: __m256i| -> i32 {
@ -55,14 +55,17 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
}
let c = ESCAPE_LUT[*ptr2 as usize] as usize;
debug_assert!(c < ESCAPED_LEN);
if start_ptr < ptr2 {
let slc =
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
buffer.push_str(std::str::from_utf8_unchecked(slc));
if c < ESCAPED_LEN {
if start_ptr < ptr2 {
let slc = slice::from_raw_parts(
start_ptr,
ptr2 as usize - start_ptr as usize,
);
buffer.push_str(std::str::from_utf8_unchecked(slc));
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
mask ^= 1 << trailing_zeros;
}
}
@ -76,14 +79,17 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
let trailing_zeros = mask.trailing_zeros() as usize;
let ptr2 = ptr.add(trailing_zeros);
let c = ESCAPE_LUT[*ptr2 as usize] as usize;
debug_assert!(c < ESCAPED_LEN);
if start_ptr < ptr2 {
let slc =
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
buffer.push_str(std::str::from_utf8_unchecked(slc));
if c < ESCAPED_LEN {
if start_ptr < ptr2 {
let slc = slice::from_raw_parts(
start_ptr,
ptr2 as usize - start_ptr as usize,
);
buffer.push_str(std::str::from_utf8_unchecked(slc));
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
mask ^= 1 << trailing_zeros;
}

View File

@ -26,9 +26,9 @@ fn contains_zero_byte(x: usize) -> bool {
#[inline]
fn contains_key(x: usize) -> bool {
const INDEPENDENTS1: usize = 0x0404_0404_0404_0404_u64 as usize;
const INDEPENDENTS1: usize = 0x0505_0505_0505_0505_u64 as usize;
const INDEPENDENTS2: usize = 0x0202_0202_0202_0202_u64 as usize;
const KEY1: usize = 0x2626_2626_2626_2626_u64 as usize;
const KEY1: usize = 0x2727_2727_2727_2727_u64 as usize;
const KEY2: usize = 0x3e3e_3e3e_3e3e_3e3e_u64 as usize;
let y1 = x | INDEPENDENTS1;
@ -72,9 +72,7 @@ pub unsafe fn escape_aligned(
debug_assert_eq!((ptr as usize) % USIZE_BYTES, 0);
let chunk = *(ptr as *const usize);
eprintln!("# {:x}", chunk);
if contains_key(chunk) {
eprintln!("true!");
start_ptr = naive::proceed(buffer, start_ptr, ptr, ptr.add(USIZE_BYTES))
}
ptr = ptr.add(USIZE_BYTES);

View File

@ -11,8 +11,8 @@ use super::buffer::Buffer;
static ESCAPE_LUT: [u8; 256] = [
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 2, 9, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 3, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@ -22,8 +22,8 @@ static ESCAPE_LUT: [u8; 256] = [
9, 9, 9, 9,
];
const ESCAPED: [&str; 4] = ["&quot;", "&amp;", "&lt;", "&gt;"];
const ESCAPED_LEN: usize = 4;
const ESCAPED: [&str; 5] = ["&quot;", "&amp;", "&#039;", "&lt;", "&gt;"];
const ESCAPED_LEN: usize = 5;
#[cfg(target_feature = "avx2")]
pub(crate) fn escape_to_buf(feed: &str, buf: &mut Buffer) {
@ -108,7 +108,7 @@ mod tests {
#[test]
fn escape_short() {
assert_eq!(escape("<"), "&lt;");
assert_eq!(escape("\"&<>"), "&quot;&amp;&lt;&gt;");
assert_eq!(escape("\"&<>'"), "&quot;&amp;&lt;&gt;&#039;");
assert_eq!(
escape("{\"title\": \"This is a JSON!\"}"),
"{&quot;title&quot;: &quot;This is a JSON!&quot;}"
@ -124,8 +124,8 @@ mod tests {
#[rustfmt::skip]
fn escape_long() {
assert_eq!(
escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###),
r###"m{jml&amp;,?6&gt;\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k&quot;@p4$~?;!;pn_l8v.&quot;ki`%/&amp;^=\[y+qcerr`@3*|?du.\0vd#40.&gt;bcpf\u@m|c&lt;2t7`hk)^?&quot;0u{v%9}4y2hhv?%-f`&lt;;rzwx`7}l(j2b:c\&lt;|z&amp;$x{+k;f`0+w3e0\m.wmdli&gt;94e2hp\$}j0&amp;m(*h$/lwlj#}99r;o.kj@1#}~v+;y~b[~m.eci}&amp;l7fxt`\\{~#k*9z/d{}(.^j}[(,]:&lt;\h]9k2+0*w60/|23~5;/!-h&amp;ci*~e1h~+:1lhh\&gt;y_*&gt;:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*&lt;{6!&lt;(_|&lt;#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&amp;jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t&gt;@o*&quot;$]799r=+)t&gt;co?rvgk%u0c@.9os;#t_*/gqv&lt;za&amp;~r^]&quot;{t4by2t`&lt;q4bfo^&amp;!so5/~(nxk:7l\;#0w41u~w3i$g|&gt;e/t;o&lt;*`~?3.jyx+h)+^cn^j4td|&gt;)~rs)vm#]:&quot;&amp;\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo&quot;chyat3k#7pq1u=.l]c14twa4tg#5k_&quot;"###
escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###),
r###"m{jml&amp;,?6&gt;\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k&quot;&#039;@p4$~?;!;pn_l8v.&quot;ki`%/&amp;^=\[y+qcerr`@3*|?du.\0vd#40&#039;.&gt;bcpf\u@m|c&lt;2t7`hk)^?&quot;0u{v%9}4y2hhv?%-f`&lt;;rzwx`7}l(j2b:c\&lt;|z&amp;$x{+k;f`0+w3e0\m.wmdli&gt;94e2hp\$}j0&amp;m(*h$/lwlj#}99r&#039;;o.kj@1#}~v+;y~b[~m.eci}&amp;l7fxt`\\{~#k*9z/d{}(.^j}[(,]:&lt;\h]9k2+0*w60/|23~5;/!-h&amp;ci*~e1h~+:1lhh\&gt;y_*&gt;:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*&lt;{6!&lt;(_|&lt;#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&amp;jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t&gt;@o*&quot;$]799r=+)t&gt;co?rvgk%u0c@.9os;#t_*/gqv&lt;za&amp;~r^]&quot;{t4by2t`&lt;q4bfo^&amp;!so5/~(nxk:7l\;#0w41u~w3i$g|&gt;e/t;o&lt;*`~?3.jyx+h)+^cn^j4td|&gt;)~rs)vm#]:&quot;&amp;\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo&quot;chyat3k#7pq1u=.l]c14twa4tg#5k_&quot;"###
);
}

View File

@ -27,9 +27,9 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
let mut start_ptr = bytes.as_ptr();
let end_ptr = start_ptr.add(len);
let v_independent1 = _mm_set1_epi8(4);
let v_independent1 = _mm_set1_epi8(5);
let v_independent2 = _mm_set1_epi8(2);
let v_key1 = _mm_set1_epi8(0x26);
let v_key1 = _mm_set1_epi8(0x27);
let v_key2 = _mm_set1_epi8(0x3e);
let maskgen = |x: __m128i| -> i32 {
@ -52,14 +52,17 @@ pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
}
let c = ESCAPE_LUT[*ptr2 as usize] as usize;
debug_assert!(c < ESCAPED_LEN);
if start_ptr < ptr2 {
let slc =
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
buffer.push_str(std::str::from_utf8_unchecked(slc));
if c < ESCAPED_LEN {
if start_ptr < ptr2 {
let slc = slice::from_raw_parts(
start_ptr,
ptr2 as usize - start_ptr as usize,
);
buffer.push_str(std::str::from_utf8_unchecked(slc));
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
mask ^= 1 << trailing_zeros;
}
}
@ -77,9 +80,9 @@ pub unsafe fn escape_aligned(
end_ptr: *const u8,
) {
let mut next_ptr = ptr.add(VECTOR_BYTES);
let v_independent1 = _mm_set1_epi8(4);
let v_independent1 = _mm_set1_epi8(5);
let v_independent2 = _mm_set1_epi8(2);
let v_key1 = _mm_set1_epi8(0x26);
let v_key1 = _mm_set1_epi8(0x27);
let v_key2 = _mm_set1_epi8(0x3e);
let maskgen = |x: __m128i| -> i32 {
@ -96,14 +99,17 @@ pub unsafe fn escape_aligned(
let trailing_zeros = mask.trailing_zeros() as usize;
let ptr2 = ptr.add(trailing_zeros);
let c = ESCAPE_LUT[*ptr2 as usize] as usize;
debug_assert!(c < ESCAPED_LEN);
if start_ptr < ptr2 {
let slc =
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
buffer.push_str(std::str::from_utf8_unchecked(slc));
if c < ESCAPED_LEN {
if start_ptr < ptr2 {
let slc = slice::from_raw_parts(
start_ptr,
ptr2 as usize - start_ptr as usize,
);
buffer.push_str(std::str::from_utf8_unchecked(slc));
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
mask ^= 1 << trailing_zeros;
}
@ -119,14 +125,17 @@ pub unsafe fn escape_aligned(
let trailing_zeros = mask.trailing_zeros() as usize;
let ptr2 = ptr.add(trailing_zeros);
let c = ESCAPE_LUT[*ptr2 as usize] as usize;
debug_assert!(c < ESCAPED_LEN);
if start_ptr < ptr2 {
let slc =
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
buffer.push_str(std::str::from_utf8_unchecked(slc));
if c < ESCAPED_LEN {
if start_ptr < ptr2 {
let slc = slice::from_raw_parts(
start_ptr,
ptr2 as usize - start_ptr as usize,
);
buffer.push_str(std::str::from_utf8_unchecked(slc));
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
}
buffer.push_str(*ESCAPED.get_unchecked(c));
start_ptr = ptr2.add(1);
mask ^= 1 << trailing_zeros;
}