2020-06-06 06:16:08 -04:00
|
|
|
//! HTML escaping
|
|
|
|
//!
|
2020-06-16 00:51:18 -04:00
|
|
|
//! By default sailfish replaces the characters `&"'<>` with the equivalent html.
|
2020-06-06 06:16:08 -04:00
|
|
|
|
2020-06-04 16:39:33 -04:00
|
|
|
mod avx2;
|
|
|
|
mod fallback;
|
|
|
|
mod naive;
|
|
|
|
mod sse2;
|
|
|
|
|
2020-06-18 04:23:50 -04:00
|
|
|
use std::mem;
|
|
|
|
use std::sync::atomic::{AtomicPtr, Ordering};
|
|
|
|
|
2020-06-04 16:39:33 -04:00
|
|
|
use super::buffer::Buffer;
|
|
|
|
|
2020-06-18 04:23:50 -04:00
|
|
|
type FnRaw = *mut ();
|
|
|
|
|
2020-06-04 16:39:33 -04:00
|
|
|
static ESCAPE_LUT: [u8; 256] = [
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
2020-06-14 23:06:14 -04:00
|
|
|
9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9, 3, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
2020-06-04 16:39:33 -04:00
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
|
|
9, 9, 9, 9,
|
|
|
|
];
|
|
|
|
|
2020-06-14 23:06:14 -04:00
|
|
|
const ESCAPED: [&str; 5] = [""", "&", "'", "<", ">"];
|
|
|
|
const ESCAPED_LEN: usize = 5;
|
2020-06-04 16:39:33 -04:00
|
|
|
|
2020-06-18 04:23:50 -04:00
|
|
|
static FN: AtomicPtr<()> = AtomicPtr::new(escape as FnRaw);
|
|
|
|
|
2020-06-10 10:30:11 -04:00
|
|
|
#[cfg(target_feature = "avx2")]
|
2020-07-02 05:21:21 -04:00
|
|
|
#[inline]
|
|
|
|
fn escape(feed: &str, buf: &mut Buffer) {
|
2020-07-04 09:32:33 -04:00
|
|
|
debug_assert!(feed.len() >= 16);
|
2020-07-02 05:21:21 -04:00
|
|
|
unsafe { avx2::escape(feed, buf) }
|
2020-06-10 10:30:11 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(not(target_feature = "avx2"))]
|
2020-07-02 05:21:21 -04:00
|
|
|
fn escape(feed: &str, buf: &mut Buffer) {
|
2020-07-04 09:32:33 -04:00
|
|
|
debug_assert!(feed.len() >= 16);
|
2020-06-18 04:23:50 -04:00
|
|
|
let fun = if is_x86_feature_detected!("avx2") {
|
|
|
|
avx2::escape
|
|
|
|
} else if is_x86_feature_detected!("sse2") {
|
|
|
|
sse2::escape
|
|
|
|
} else {
|
|
|
|
fallback::escape
|
|
|
|
};
|
|
|
|
|
|
|
|
FN.store(fun as FnRaw, Ordering::Relaxed);
|
|
|
|
unsafe { fun(feed, buf) };
|
|
|
|
}
|
2020-06-09 21:27:13 -04:00
|
|
|
|
2020-06-19 02:36:42 -04:00
|
|
|
/// Change the default escape function
|
2020-07-02 05:21:21 -04:00
|
|
|
#[deprecated(since = "0.1.2", note = "This function does not anything any more")]
|
|
|
|
pub fn register_escape_fn(_fun: fn(&str, &mut Buffer)) {}
|
2020-06-04 16:39:33 -04:00
|
|
|
|
2020-07-05 00:25:57 -04:00
|
|
|
#[cfg_attr(feature = "perf-inline", inline)]
|
2020-06-18 04:23:50 -04:00
|
|
|
pub(crate) fn escape_to_buf(feed: &str, buf: &mut Buffer) {
|
2020-06-09 21:27:13 -04:00
|
|
|
unsafe {
|
2020-07-02 05:21:21 -04:00
|
|
|
if feed.len() < 16 {
|
2020-07-04 09:32:33 -04:00
|
|
|
buf.reserve(feed.len() * 6);
|
|
|
|
let l = naive::escape_small(feed, buf.as_mut_ptr().add(buf.len()));
|
|
|
|
buf.set_len(buf.len() + l);
|
|
|
|
} else {
|
|
|
|
let fun = FN.load(Ordering::Relaxed);
|
|
|
|
mem::transmute::<FnRaw, fn(&str, &mut Buffer)>(fun)(feed, buf);
|
2020-07-02 05:21:21 -04:00
|
|
|
}
|
2020-06-09 21:27:13 -04:00
|
|
|
}
|
2020-06-04 16:39:33 -04:00
|
|
|
}
|
|
|
|
|
2020-06-06 06:16:08 -04:00
|
|
|
/// write the escaped contents into `String`
|
|
|
|
///
|
|
|
|
/// # Examples
|
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// use sailfish::runtime::escape::escape_to_string;
|
|
|
|
///
|
|
|
|
/// let mut buf = String::new();
|
|
|
|
/// escape_to_string("<h1>Hello, world!</h1>", &mut buf);
|
|
|
|
/// assert_eq!(buf, "<h1>Hello, world!</h1>");
|
|
|
|
/// ```
|
2020-06-04 16:39:33 -04:00
|
|
|
#[inline]
|
|
|
|
pub fn escape_to_string(feed: &str, s: &mut String) {
|
2020-06-06 23:40:27 -04:00
|
|
|
let mut s2 = String::new();
|
|
|
|
std::mem::swap(s, &mut s2);
|
|
|
|
let mut buf = Buffer::from(s2);
|
|
|
|
escape_to_buf(feed, &mut buf);
|
|
|
|
let mut s2 = buf.into_string();
|
|
|
|
std::mem::swap(s, &mut s2);
|
2020-06-04 16:39:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
fn escape(feed: &str) -> String {
|
2020-06-07 10:01:24 -04:00
|
|
|
let mut s = String::new();
|
|
|
|
escape_to_string(feed, &mut s);
|
|
|
|
s
|
2020-06-04 16:39:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn noescape() {
|
|
|
|
assert_eq!(escape(""), "");
|
|
|
|
assert_eq!(
|
|
|
|
escape("abcdefghijklmnopqrstrvwxyz"),
|
|
|
|
"abcdefghijklmnopqrstrvwxyz"
|
|
|
|
);
|
|
|
|
assert_eq!(escape("!#$%()*+,-.:;=?_^"), "!#$%()*+,-.:;=?_^");
|
|
|
|
assert_eq!(
|
|
|
|
escape("漢字はエスケープしないはずだよ"),
|
|
|
|
"漢字はエスケープしないはずだよ"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn escape_short() {
|
|
|
|
assert_eq!(escape("<"), "<");
|
2020-06-14 23:06:14 -04:00
|
|
|
assert_eq!(escape("\"&<>'"), ""&<>'");
|
2020-06-04 16:39:33 -04:00
|
|
|
assert_eq!(
|
|
|
|
escape("{\"title\": \"This is a JSON!\"}"),
|
|
|
|
"{"title": "This is a JSON!"}"
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
escape("<html><body><h1>Hello, world</h1></body></html>"),
|
|
|
|
"<html><body><h1>Hello, world</h1>\
|
|
|
|
</body></html>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[rustfmt::skip]
|
|
|
|
fn escape_long() {
|
|
|
|
assert_eq!(
|
2020-06-14 23:06:14 -04:00
|
|
|
escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###),
|
|
|
|
r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###
|
2020-06-04 16:39:33 -04:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn random() {
|
|
|
|
const ASCII_CHARS: &'static [u8] = br##"abcdefghijklmnopqrstuvwxyz0123456789-^\@[;:],./\!"#$%&'()~=~|`{+*}<>?_"##;
|
|
|
|
let mut state = 88172645463325252u64;
|
|
|
|
let mut data = Vec::with_capacity(100);
|
|
|
|
let mut buf1 = Buffer::new();
|
|
|
|
let mut buf2 = Buffer::new();
|
2020-06-07 09:57:43 -04:00
|
|
|
let mut buf3 = Buffer::new();
|
2020-06-04 16:39:33 -04:00
|
|
|
|
|
|
|
for len in 0..100 {
|
|
|
|
data.clear();
|
|
|
|
for _ in 0..len {
|
|
|
|
// xorshift
|
|
|
|
state ^= state << 13;
|
|
|
|
state ^= state >> 7;
|
|
|
|
state ^= state << 17;
|
|
|
|
|
|
|
|
let idx = state as usize % ASCII_CHARS.len();
|
|
|
|
data.push(ASCII_CHARS[idx]);
|
|
|
|
}
|
|
|
|
|
|
|
|
let s = unsafe { std::str::from_utf8_unchecked(&*data) };
|
|
|
|
|
|
|
|
buf1.clear();
|
|
|
|
buf2.clear();
|
2020-06-07 09:57:43 -04:00
|
|
|
buf3.clear();
|
2020-06-04 16:39:33 -04:00
|
|
|
|
|
|
|
unsafe {
|
2020-06-18 04:23:50 -04:00
|
|
|
escape_to_buf(s, &mut buf1);
|
|
|
|
fallback::escape(s, &mut buf2);
|
2020-06-09 21:27:13 -04:00
|
|
|
naive::escape(&mut buf3, s.as_ptr(), s.as_ptr(), s.as_ptr().add(s.len()));
|
2020-06-04 16:39:33 -04:00
|
|
|
}
|
|
|
|
|
2020-06-07 09:57:43 -04:00
|
|
|
assert_eq!(buf1.as_str(), buf3.as_str());
|
|
|
|
assert_eq!(buf2.as_str(), buf3.as_str());
|
2020-06-04 16:39:33 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|