
169 lines
6.4 KiB
Raw Normal View History

2020-06-06 06:16:08 -04:00
//! HTML escaping
//! By default sailfish replaces the characters `&"<>` with the equivalent html.
2020-06-04 16:39:33 -04:00
mod avx2;
mod fallback;
mod naive;
mod sse2;
use super::buffer::Buffer;
static ESCAPE_LUT: [u8; 256] = [
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 2, 9, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9,
const ESCAPED: [&str; 4] = ["&quot;", "&amp;", "&lt;", "&gt;"];
2020-06-04 16:39:33 -04:00
const ESCAPED_LEN: usize = 4;
2020-06-06 06:16:08 -04:00
/// write the escaped contents with custom function
/// This function is soft-deprecated because using this function causes a large binary size.
2020-06-04 16:39:33 -04:00
pub fn escape_with<F: FnMut(&str)>(mut writer: F, feed: &str) {
unsafe {
#[cfg(target_feature = "avx2")]
avx2::escape(&mut writer, feed.as_bytes());
#[cfg(not(target_feature = "avx2"))]
if is_x86_feature_detected!("avx2") {
avx2::escape(&mut writer, feed.as_bytes());
} else if is_x86_feature_detected!("sse2") {
sse2::escape(&mut writer, feed.as_bytes());
} else {
fallback::escape(&mut writer, feed.as_bytes());
pub fn escape_to_buf(feed: &str, buf: &mut Buffer) {
escape_with(|e| buf.write_str(e), feed);
2020-06-06 06:16:08 -04:00
/// write the escaped contents into `String`
/// # Examples
/// ```
/// use sailfish::runtime::escape::escape_to_string;
/// let mut buf = String::new();
/// escape_to_string("<h1>Hello, world!</h1>", &mut buf);
/// assert_eq!(buf, "&lt;h1&gt;Hello, world!&lt;/h1&gt;");
/// ```
2020-06-04 16:39:33 -04:00
pub fn escape_to_string(feed: &str, s: &mut String) {
let mut s2 = String::new();
std::mem::swap(s, &mut s2);
let mut buf = Buffer::from(s2);
escape_to_buf(feed, &mut buf);
let mut s2 = buf.into_string();
std::mem::swap(s, &mut s2);
2020-06-04 16:39:33 -04:00
mod tests {
use super::*;
fn escape(feed: &str) -> String {
let mut buf = Buffer::new();
escape_to_buf(feed, &mut buf);
fn noescape() {
assert_eq!(escape(""), "");
assert_eq!(escape("!#$%()*+,-.:;=?_^"), "!#$%()*+,-.:;=?_^");
fn escape_short() {
assert_eq!(escape("<"), "&lt;");
assert_eq!(escape("\"&<>"), "&quot;&amp;&lt;&gt;");
escape("{\"title\": \"This is a JSON!\"}"),
"{&quot;title&quot;: &quot;This is a JSON!&quot;}"
escape("<html><body><h1>Hello, world</h1></body></html>"),
"&lt;html&gt;&lt;body&gt;&lt;h1&gt;Hello, world&lt;/h1&gt;\
fn escape_long() {
fn random() {
const ASCII_CHARS: &'static [u8] = br##"abcdefghijklmnopqrstuvwxyz0123456789-^\@[;:],./\!"#$%&'()~=~|`{+*}<>?_"##;
let mut state = 88172645463325252u64;
let mut data = Vec::with_capacity(100);
let mut buf1 = Buffer::new();
let mut buf2 = Buffer::new();
let mut buf3 = Buffer::new();
2020-06-04 16:39:33 -04:00
for len in 0..100 {
for _ in 0..len {
// xorshift
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
let idx = state as usize % ASCII_CHARS.len();
let s = unsafe { std::str::from_utf8_unchecked(&*data) };
2020-06-04 16:39:33 -04:00
unsafe {
escape_to_buf(&*s, &mut buf1);
fallback::escape(&mut |s| buf2.write_str(s), s.as_bytes());
2020-06-04 16:39:33 -04:00
&mut |s| buf3.write_str(s),
2020-06-04 16:39:33 -04:00
assert_eq!(buf1.as_str(), buf3.as_str());
assert_eq!(buf2.as_str(), buf3.as_str());
2020-06-04 16:39:33 -04:00