//! HTML escaping utilities //! //! By default sailfish replaces the characters `&"'<>` with the equivalent html. #![cfg_attr( all( any(target_arch = "x86", target_arch = "x86_64"), not(miri), target_feature = "avx2" ), allow(dead_code) )] #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri)))] mod avx2; mod fallback; mod naive; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri)))] mod sse2; static ESCAPE_LUT: [u8; 256] = [ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, ]; const ESCAPED: [&str; 5] = [""", "&", "'", "<", ">"]; const ESCAPED_LEN: usize = 5; use crate::escape::EscapeMeta; use super::{Buffer, Escape}; /// A scheme for escaping strings for safe insertion into HTML. #[derive(Debug, Clone, Copy)] pub struct EscapeHtml; impl const EscapeMeta for EscapeHtml { #[inline] fn is_identity(c: char) -> bool { !matches!(c, '\"' | '&' | '<' | '>' | '\'') } } impl Escape for EscapeHtml { type Escaped = &'static str; #[inline(always)] fn escape(&self, c: char) -> Option { match c { '\"' => Some("""), '&' => Some("&"), '<' => Some("<"), '>' => Some(">"), '\'' => Some("'"), _ => None, } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri)))] #[cfg_attr(feature = "perf-inline", inline)] fn escape_to_buf(&self, buf: &mut Buffer, string: &str) { unsafe { if string.len() < 16 { buf.reserve_small(string.len() * 6); let l = naive::escape_small(string, buf.as_mut_ptr().add(buf.len())); buf.advance(l); } else { #[cfg(target_feature = "avx2")] avx2::escape(string, buf); #[cfg(all(not(target_feature = "avx2"), target_feature = "sse2"))] sse2::escape(string, buf); #[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))] { use std::sync::atomic::{AtomicPtr, Ordering}; type FnRaw = *mut (); static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw); fn detect(string: &str, buf: &mut Buffer) { debug_assert!(string.len() >= 16); let fun = if is_x86_feature_detected!("avx2") { avx2::escape } else if is_x86_feature_detected!("sse2") { sse2::escape } else { fallback::escape }; FN.store(fun as FnRaw, Ordering::Relaxed); unsafe { fun(string, buf) }; } let fun = FN.load(Ordering::Relaxed); std::mem::transmute::(fun)(string, buf); }; } } } #[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri))))] #[cfg_attr(feature = "perf-inline", inline)] fn escape_to_buf(&self, buffer: &mut Buffer, string: &str) { unsafe { if cfg!(miri) { let bp = feed.as_ptr(); naive::escape(buf, bp, bp, bp.add(feed.len())) } else if feed.len() < 16 { buf.reserve_small(feed.len() * 6); let l = naive::escape_small(feed, buf.as_mut_ptr().add(buf.len())); buf.advance(l); } else { fallback::escape(feed, buf) } } } } /// write the escaped contents into `Buffer` #[deprecated = "Use [`EscapeHtml::escape_to_buf`] instead"] #[inline(always)] fn escape_to_buf(feed: &str, buf: &mut Buffer) { EscapeHtml.escape_to_buf(buf, feed) } #[cfg(test)] mod tests { use crate::escape; use super::*; #[test] fn check_idents() { assert!(escape::CommonIdents::::BOOLS); assert!(escape::CommonIdents::::UINTS); assert!(escape::CommonIdents::::INTS); assert!(escape::CommonIdents::::FLOATS); } fn escape(feed: &str) -> String { let mut s = String::new(); EscapeHtml.escape_to_string(&mut s, feed); s } #[test] fn noescape() { assert_eq!(escape(""), ""); assert_eq!(escape("1234567890"), "1234567890"); assert_eq!( escape("abcdefghijklmnopqrstrvwxyz"), "abcdefghijklmnopqrstrvwxyz" ); assert_eq!(escape("!#$%()*+,-.:;=?_^"), "!#$%()*+,-.:;=?_^"); assert_eq!( escape("漢字はエスケープしないはずだよ"), "漢字はエスケープしないはずだよ" ); } #[test] fn escape_short() { assert_eq!(escape("<"), "<"); assert_eq!(escape("\"&<>'"), ""&<>'"); assert_eq!( escape("{\"title\": \"This is a JSON!\"}"), "{"title": "This is a JSON!"}" ); assert_eq!( escape("

Hello, world

"), "<html><body><h1>Hello, world</h1>\ </body></html>" ); } #[test] #[rustfmt::skip] fn escape_long() { assert_eq!( escape(r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqve/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""###), r###"m{jml&,?6>\2~08g)\=3`,_`$1@0{i5j}.}2ki\^t}k"'@p4$~?;!;pn_l8v."ki`%/&^=\[y+qcerr`@3*|?du.\0vd#40'.>bcpf\u@m|c<2t7`hk)^?"0u{v%9}4y2hhv?%-f`<;rzwx`7}l(j2b:c\<|z&$x{+k;f`0+w3e0\m.wmdli>94e2hp\$}j0&m(*h$/lwlj#}99r';o.kj@1#}~v+;y~b[~m.eci}&l7fxt`\\{~#k*9z/d{}(.^j}[(,]:<\h]9k2+0*w60/|23~5;/!-h&ci*~e1h~+:1lhh\>y_*>:-\zzv+8uo],,a^k3_,uip]-/.-~\t51a*<{6!<(_|<#o6=\h1*`[2x_?#-/])x};};r@wqx|;/w&jrv~?\`t:^/dug3(g(ener?!t$}h4:57ptnm@71e=t>@o*"$]799r=+)t>co?rvgk%u0c@.9os;#t_*/gqv<za&~r^]"{t4by2t`<q4bfo^&!so5/~(nxk:7l\;#0w41u~w3i$g|>e/t;o<*`~?3.jyx+h)+^cn^j4td|>)~rs)vm#]:"&\fi;54%+z~fhe|w~\q|ui={54[b9tg*?@]g+q!mq]3jg2?eoo"chyat3k#7pq1u=.l]c14twa4tg#5k_""### ); } #[test] #[cfg(not(miri))] fn random() { const ASCII_CHARS: &'static [u8] = br##"abcdefghijklmnopqrstuvwxyz0123456789-^\@[;:],./\!"#$%&'()~=~|`{+*}<>?_"##; let mut state = 88172645463325252u64; let mut data = Vec::with_capacity(100); let mut buf_naive = Buffer::new(); let mut buf = Buffer::new(); for len in 16..100 { for _ in 0..5 { data.clear(); for _ in 0..len { // xorshift state ^= state << 13; state ^= state >> 7; state ^= state << 17; let idx = state as usize % ASCII_CHARS.len(); data.push(ASCII_CHARS[idx]); } let s = unsafe { std::str::from_utf8_unchecked(&*data) }; unsafe { naive::escape( &mut buf_naive, s.as_ptr(), s.as_ptr(), s.as_ptr().add(s.len()), ); fallback::escape(s, &mut buf); assert_eq!(buf.as_str(), buf_naive.as_str()); buf.clear(); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("sse2") { sse2::escape(s, &mut buf); assert_eq!(buf.as_str(), buf_naive.as_str()); buf.clear(); } if is_x86_feature_detected!("avx2") { avx2::escape(s, &mut buf); assert_eq!(buf.as_str(), buf_naive.as_str()); buf.clear(); } } } buf_naive.clear(); } } } }