Speed up feature detection for escaping
- Remove sailfish::runtime::escape::escape_with
This commit is contained in:
parent
655bcb9e2f
commit
53f0db024c
|
@ -8,21 +8,22 @@ use std::slice;
|
|||
|
||||
use super::{naive, sse2};
|
||||
use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT};
|
||||
use super::super::Buffer;
|
||||
|
||||
const VECTOR_BYTES: usize = std::mem::size_of::<__m256i>();
|
||||
const VECTOR_ALIGN: usize = VECTOR_BYTES - 1;
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
||||
pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
|
||||
let len = bytes.len();
|
||||
let mut start_ptr = bytes.as_ptr();
|
||||
let end_ptr = start_ptr.add(len);
|
||||
|
||||
if len < VECTOR_BYTES {
|
||||
if len < 16 {
|
||||
naive::escape(writer, start_ptr, start_ptr, end_ptr);
|
||||
naive::escape(buffer, start_ptr, start_ptr, end_ptr);
|
||||
} else {
|
||||
sse2::escape(writer, bytes);
|
||||
sse2::escape(buffer, bytes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -56,9 +57,9 @@ pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
|||
if start_ptr < ptr2 {
|
||||
let slc =
|
||||
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
writer(*ESCAPED.get_unchecked(c));
|
||||
buffer.push_str(*ESCAPED.get_unchecked(c));
|
||||
start_ptr = ptr2.add(1);
|
||||
mask ^= 1 << trailing_zeros;
|
||||
}
|
||||
|
@ -77,9 +78,9 @@ pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
|||
if start_ptr < ptr2 {
|
||||
let slc =
|
||||
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
writer(*ESCAPED.get_unchecked(c));
|
||||
buffer.push_str(*ESCAPED.get_unchecked(c));
|
||||
start_ptr = ptr2.add(1);
|
||||
mask ^= 1 << trailing_zeros;
|
||||
}
|
||||
|
@ -88,5 +89,5 @@ pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
|||
next_ptr = next_ptr.add(VECTOR_BYTES);
|
||||
}
|
||||
|
||||
sse2::escape_aligned(writer, start_ptr, ptr, end_ptr);
|
||||
sse2::escape_aligned(buffer, start_ptr, ptr, end_ptr);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#![allow(clippy::cast_ptr_alignment)]
|
||||
|
||||
use super::naive;
|
||||
use super::super::Buffer;
|
||||
|
||||
#[cfg(target_pointer_width = "16")]
|
||||
const USIZE_BYTES: usize = 2;
|
||||
|
@ -37,13 +38,13 @@ fn contains_key(x: usize) -> bool {
|
|||
contains_zero_byte(z1) || contains_zero_byte(z2)
|
||||
}
|
||||
|
||||
pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
||||
pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
|
||||
let len = bytes.len();
|
||||
let mut start_ptr = bytes.as_ptr();
|
||||
let end_ptr = start_ptr.add(len);
|
||||
|
||||
if bytes.len() < USIZE_BYTES {
|
||||
naive::escape(writer, start_ptr, start_ptr, end_ptr);
|
||||
naive::escape(buffer, start_ptr, start_ptr, end_ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -54,14 +55,14 @@ pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
|||
|
||||
let chunk = (ptr as *const usize).read_unaligned();
|
||||
if contains_key(chunk) {
|
||||
start_ptr = naive::proceed(writer, start_ptr, ptr, aligned_ptr);
|
||||
start_ptr = naive::proceed(buffer, start_ptr, ptr, aligned_ptr);
|
||||
}
|
||||
|
||||
escape_aligned(writer, start_ptr, aligned_ptr, end_ptr);
|
||||
escape_aligned(buffer, start_ptr, aligned_ptr, end_ptr);
|
||||
}
|
||||
|
||||
pub unsafe fn escape_aligned<F: FnMut(&str)>(
|
||||
writer: &mut F,
|
||||
pub unsafe fn escape_aligned(
|
||||
buffer: &mut Buffer,
|
||||
mut start_ptr: *const u8,
|
||||
mut ptr: *const u8,
|
||||
end_ptr: *const u8,
|
||||
|
@ -73,11 +74,11 @@ pub unsafe fn escape_aligned<F: FnMut(&str)>(
|
|||
eprintln!("# {:x}", chunk);
|
||||
if contains_key(chunk) {
|
||||
eprintln!("true!");
|
||||
start_ptr = naive::proceed(writer, start_ptr, ptr, ptr.add(USIZE_BYTES))
|
||||
start_ptr = naive::proceed(buffer, start_ptr, ptr, ptr.add(USIZE_BYTES))
|
||||
}
|
||||
ptr = ptr.add(USIZE_BYTES);
|
||||
}
|
||||
debug_assert!(ptr <= end_ptr);
|
||||
debug_assert!(start_ptr <= ptr);
|
||||
naive::escape(writer, start_ptr, ptr, end_ptr);
|
||||
naive::escape(buffer, start_ptr, ptr, end_ptr);
|
||||
}
|
||||
|
|
|
@ -25,33 +25,34 @@ static ESCAPE_LUT: [u8; 256] = [
|
|||
const ESCAPED: [&str; 4] = [""", "&", "<", ">"];
|
||||
const ESCAPED_LEN: usize = 4;
|
||||
|
||||
/// write the escaped contents with custom function
|
||||
///
|
||||
/// This function is soft-deprecated because using this function causes a large binary size.
|
||||
#[inline]
|
||||
pub fn escape_with<F: FnMut(&str)>(mut writer: F, feed: &str) {
|
||||
unsafe {
|
||||
#[cfg(target_feature = "avx2")]
|
||||
{
|
||||
avx2::escape(&mut writer, feed.as_bytes());
|
||||
}
|
||||
|
||||
#[cfg(not(target_feature = "avx2"))]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
avx2::escape(&mut writer, feed.as_bytes());
|
||||
} else if is_x86_feature_detected!("sse2") {
|
||||
sse2::escape(&mut writer, feed.as_bytes());
|
||||
} else {
|
||||
fallback::escape(&mut writer, feed.as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn escape_to_buf(feed: &str, buf: &mut Buffer) {
|
||||
escape_with(|e| buf.push_str(e), feed);
|
||||
use std::mem;
|
||||
use std::sync::atomic::{AtomicPtr, Ordering};
|
||||
|
||||
type FnRaw = *mut ();
|
||||
|
||||
static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
|
||||
|
||||
fn detect(buffer: &mut Buffer, bytes: &[u8]) {
|
||||
let fun = if is_x86_feature_detected!("avx2") {
|
||||
avx2::escape as FnRaw
|
||||
} else if is_x86_feature_detected!("sse2") {
|
||||
sse2::escape as FnRaw
|
||||
} else {
|
||||
fallback::escape as FnRaw
|
||||
};
|
||||
|
||||
FN.store(fun as FnRaw, Ordering::Relaxed);
|
||||
unsafe {
|
||||
mem::transmute::<FnRaw, fn(&mut Buffer, &[u8])>(fun)(buffer, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let fun = FN.load(Ordering::Relaxed);
|
||||
mem::transmute::<FnRaw, fn(&mut Buffer, &[u8])>(fun)(buf, feed.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
/// write the escaped contents into `String`
|
||||
|
@ -152,13 +153,8 @@ mod tests {
|
|||
|
||||
unsafe {
|
||||
escape_to_buf(&*s, &mut buf1);
|
||||
fallback::escape(&mut |s| buf2.push_str(s), s.as_bytes());
|
||||
naive::escape(
|
||||
&mut |s| buf3.push_str(s),
|
||||
s.as_ptr(),
|
||||
s.as_ptr(),
|
||||
s.as_ptr().add(s.len()),
|
||||
);
|
||||
fallback::escape(&mut buf2, s.as_bytes());
|
||||
naive::escape(&mut buf3, s.as_ptr(), s.as_ptr(), s.as_ptr().add(s.len()));
|
||||
}
|
||||
|
||||
assert_eq!(buf1.as_str(), buf3.as_str());
|
||||
|
|
|
@ -1,25 +1,26 @@
|
|||
use core::slice;
|
||||
|
||||
use super::super::Buffer;
|
||||
use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT};
|
||||
|
||||
#[inline]
|
||||
pub(super) unsafe fn escape<F: FnMut(&str)>(
|
||||
writer: &mut F,
|
||||
pub(super) unsafe fn escape(
|
||||
buffer: &mut Buffer,
|
||||
mut start_ptr: *const u8,
|
||||
ptr: *const u8,
|
||||
end_ptr: *const u8,
|
||||
) {
|
||||
start_ptr = proceed(writer, start_ptr, ptr, end_ptr);
|
||||
start_ptr = proceed(buffer, start_ptr, ptr, end_ptr);
|
||||
|
||||
if end_ptr > start_ptr {
|
||||
let slc = slice::from_raw_parts(start_ptr, end_ptr as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) unsafe fn proceed<F: FnMut(&str)>(
|
||||
writer: &mut F,
|
||||
pub(super) unsafe fn proceed(
|
||||
buffer: &mut Buffer,
|
||||
mut start_ptr: *const u8,
|
||||
mut ptr: *const u8,
|
||||
end_ptr: *const u8,
|
||||
|
@ -32,9 +33,9 @@ pub(super) unsafe fn proceed<F: FnMut(&str)>(
|
|||
if ptr > start_ptr {
|
||||
let slc =
|
||||
slice::from_raw_parts(start_ptr, ptr as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
writer(*ESCAPED.get_unchecked(idx));
|
||||
buffer.push_str(*ESCAPED.get_unchecked(idx));
|
||||
start_ptr = ptr.add(1);
|
||||
}
|
||||
ptr = ptr.add(1);
|
||||
|
|
|
@ -8,19 +8,20 @@ use std::slice;
|
|||
|
||||
use super::naive;
|
||||
use super::{ESCAPED, ESCAPED_LEN, ESCAPE_LUT};
|
||||
use super::super::Buffer;
|
||||
|
||||
const VECTOR_BYTES: usize = std::mem::size_of::<__m128i>();
|
||||
const VECTOR_ALIGN: usize = VECTOR_BYTES - 1;
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[inline]
|
||||
pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
||||
pub unsafe fn escape(buffer: &mut Buffer, bytes: &[u8]) {
|
||||
let len = bytes.len();
|
||||
let mut start_ptr = bytes.as_ptr();
|
||||
let end_ptr = start_ptr.add(len);
|
||||
|
||||
if bytes.len() < VECTOR_BYTES {
|
||||
naive::escape(writer, start_ptr, start_ptr, end_ptr);
|
||||
naive::escape(buffer, start_ptr, start_ptr, end_ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -53,20 +54,20 @@ pub unsafe fn escape<F: FnMut(&str)>(writer: &mut F, bytes: &[u8]) {
|
|||
if start_ptr < ptr2 {
|
||||
let slc =
|
||||
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
writer(*ESCAPED.get_unchecked(c));
|
||||
buffer.push_str(*ESCAPED.get_unchecked(c));
|
||||
start_ptr = ptr2.add(1);
|
||||
mask ^= 1 << trailing_zeros;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = aligned_ptr;
|
||||
escape_aligned(writer, start_ptr, ptr, end_ptr);
|
||||
escape_aligned(buffer, start_ptr, ptr, end_ptr);
|
||||
}
|
||||
|
||||
pub unsafe fn escape_aligned<F: FnMut(&str)>(
|
||||
writer: &mut F,
|
||||
pub unsafe fn escape_aligned(
|
||||
buffer: &mut Buffer,
|
||||
mut start_ptr: *const u8,
|
||||
mut ptr: *const u8,
|
||||
end_ptr: *const u8,
|
||||
|
@ -95,9 +96,9 @@ pub unsafe fn escape_aligned<F: FnMut(&str)>(
|
|||
if start_ptr < ptr2 {
|
||||
let slc =
|
||||
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
writer(*ESCAPED.get_unchecked(c));
|
||||
buffer.push_str(*ESCAPED.get_unchecked(c));
|
||||
start_ptr = ptr2.add(1);
|
||||
mask ^= 1 << trailing_zeros;
|
||||
}
|
||||
|
@ -118,9 +119,9 @@ pub unsafe fn escape_aligned<F: FnMut(&str)>(
|
|||
if start_ptr < ptr2 {
|
||||
let slc =
|
||||
slice::from_raw_parts(start_ptr, ptr2 as usize - start_ptr as usize);
|
||||
writer(std::str::from_utf8_unchecked(slc));
|
||||
buffer.push_str(std::str::from_utf8_unchecked(slc));
|
||||
}
|
||||
writer(*ESCAPED.get_unchecked(c));
|
||||
buffer.push_str(*ESCAPED.get_unchecked(c));
|
||||
start_ptr = ptr2.add(1);
|
||||
mask ^= 1 << trailing_zeros;
|
||||
}
|
||||
|
@ -130,5 +131,5 @@ pub unsafe fn escape_aligned<F: FnMut(&str)>(
|
|||
|
||||
debug_assert!(ptr <= end_ptr);
|
||||
debug_assert!(start_ptr <= ptr);
|
||||
naive::escape(writer, start_ptr, ptr, end_ptr);
|
||||
naive::escape(buffer, start_ptr, ptr, end_ptr);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue