diff --git a/sailfish/src/runtime/escape/avx2.rs b/sailfish/src/runtime/escape/avx2.rs index 5062bed..fc53f77 100644 --- a/sailfish/src/runtime/escape/avx2.rs +++ b/sailfish/src/runtime/escape/avx2.rs @@ -8,6 +8,7 @@ use std::slice; use super::super::Buffer; use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT}; +use super::naive::push_escaped_str; const VECTOR_BYTES: usize = std::mem::size_of::<__m256i>(); @@ -41,6 +42,7 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { let mut mask = maskgen(_mm256_loadu_si256(ptr as *const __m256i)); while mask != 0 { let trailing_zeros = mask.trailing_zeros() as usize; + mask ^= 1 << trailing_zeros; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; if c < ESCAPED_LEN { @@ -51,10 +53,9 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { ); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(c)); + push_escaped_str(*ESCAPED.get_unchecked(c), buffer); start_ptr = ptr2.add(1); } - mask ^= 1 << trailing_zeros; } ptr = ptr.add(VECTOR_BYTES); @@ -70,6 +71,7 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { maskgen(_mm256_loadu_si256(ptr.sub(backs) as *const __m256i)) >> backs; while mask != 0 { let trailing_zeros = mask.trailing_zeros() as usize; + mask ^= 1 << trailing_zeros; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; if c < ESCAPED_LEN { @@ -80,10 +82,9 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { ); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(c)); + push_escaped_str(*ESCAPED.get_unchecked(c), buffer); start_ptr = ptr2.add(1); } - mask ^= 1 << trailing_zeros; } } @@ -119,6 +120,7 @@ unsafe fn escape_small(feed: &str, buffer: &mut Buffer) { let mut mask = maskgen(_mm_loadu_si128(ptr as *const __m128i)); while mask != 0 { let trailing_zeros = mask.trailing_zeros() as usize; + mask ^= 1 << trailing_zeros; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; if c < ESCAPED_LEN { @@ -127,10 +129,9 @@ unsafe fn escape_small(feed: &str, buffer: &mut Buffer) { slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(c)); + push_escaped_str(*ESCAPED.get_unchecked(c), buffer); start_ptr = ptr2.add(1); } - mask ^= 1 << trailing_zeros; } if len != 16 { @@ -141,6 +142,7 @@ unsafe fn escape_small(feed: &str, buffer: &mut Buffer) { while mask != 0 { let trailing_zeros = mask.trailing_zeros() as usize; + mask ^= 1 << trailing_zeros; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; if c < ESCAPED_LEN { @@ -151,10 +153,9 @@ unsafe fn escape_small(feed: &str, buffer: &mut Buffer) { ); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(c)); + push_escaped_str(*ESCAPED.get_unchecked(c), buffer); start_ptr = ptr2.add(1); } - mask ^= 1 << trailing_zeros; } } diff --git a/sailfish/src/runtime/escape/naive.rs b/sailfish/src/runtime/escape/naive.rs index 7686322..158f38f 100644 --- a/sailfish/src/runtime/escape/naive.rs +++ b/sailfish/src/runtime/escape/naive.rs @@ -1,4 +1,5 @@ -use core::slice; +use std::ptr; +use std::slice; use super::super::utils::memcpy_16; use super::super::Buffer; @@ -38,7 +39,7 @@ pub(super) unsafe fn proceed( slice::from_raw_parts(start_ptr, ptr as usize - start_ptr as usize); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(idx)); + push_escaped_str(*ESCAPED.get_unchecked(idx), buffer); start_ptr = ptr.add(1); ptr = ptr.add(1); } @@ -87,3 +88,20 @@ pub(super) unsafe fn escape_small(feed: &str, mut buf: *mut u8) -> usize { buf as usize - buf_begin as usize } + +#[inline] +pub(super) unsafe fn push_escaped_str(value: &str, buffer: &mut Buffer) { + buffer.reserve_small(value.len()); + + let src = value.as_ptr(); + let dst = buffer.as_mut_ptr().add(buffer.len()); + + // memcpy + let offset = value.len() - 4; + let t2 = ptr::read_unaligned(src.add(offset) as *const u32); + let t1 = ptr::read_unaligned(src as *const u32); + ptr::write_unaligned(dst.add(offset) as *mut u32, t2); + ptr::write_unaligned(dst as *mut u32, t1); + + buffer._set_len(buffer.len() + value.len()); +} diff --git a/sailfish/src/runtime/escape/sse2.rs b/sailfish/src/runtime/escape/sse2.rs index 820b6d8..24b4fc7 100644 --- a/sailfish/src/runtime/escape/sse2.rs +++ b/sailfish/src/runtime/escape/sse2.rs @@ -8,6 +8,7 @@ use std::slice; use super::super::Buffer; use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT}; +use super::naive::push_escaped_str; const VECTOR_BYTES: usize = std::mem::size_of::<__m128i>(); @@ -34,6 +35,7 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { let mut mask = maskgen(_mm_loadu_si128(ptr as *const __m128i)); while mask != 0 { let trailing_zeros = mask.trailing_zeros() as usize; + mask ^= 1 << trailing_zeros; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; if c < ESCAPED_LEN { @@ -44,10 +46,9 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { ); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(c)); + push_escaped_str(*ESCAPED.get_unchecked(c), buffer); start_ptr = ptr2.add(1); } - mask ^= 1 << trailing_zeros; } ptr = ptr.add(VECTOR_BYTES); @@ -63,6 +64,7 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { let mut mask = maskgen(_mm_loadu_si128(read_ptr as *const __m128i)) >> backs; while mask != 0 { let trailing_zeros = mask.trailing_zeros() as usize; + mask ^= 1 << trailing_zeros; let ptr2 = ptr.add(trailing_zeros); let c = ESCAPE_LUT[*ptr2 as usize] as usize; if c < ESCAPED_LEN { @@ -73,10 +75,9 @@ pub unsafe fn escape(feed: &str, buffer: &mut Buffer) { ); buffer.push_str(std::str::from_utf8_unchecked(slc)); } - buffer.push_str(*ESCAPED.get_unchecked(c)); + push_escaped_str(*ESCAPED.get_unchecked(c), buffer); start_ptr = ptr2.add(1); } - mask ^= 1 << trailing_zeros; } }