This commit is contained in:
Wilson Lin 2021-08-06 16:19:36 +10:00
parent 5a259a8ead
commit 1a930a170d
23 changed files with 400 additions and 198 deletions

View File

@ -1,9 +1,9 @@
use crate::cfg::Cfg; use crate::cfg::Cfg;
use crate::minify::content::minify_content; use crate::minify::content::minify_content;
use crate::parse::Code;
use crate::parse::content::parse_content; use crate::parse::content::parse_content;
use crate::spec::tag::EMPTY_TAG_NAME; use crate::parse::Code;
use crate::spec::tag::ns::Namespace; use crate::spec::tag::ns::Namespace;
use crate::spec::tag::EMPTY_TAG_NAME;
mod ast; mod ast;
mod cfg; mod cfg;
@ -37,7 +37,13 @@ mod tests;
/// ``` /// ```
pub fn minify(src: &[u8], cfg: &Cfg) -> Vec<u8> { pub fn minify(src: &[u8], cfg: &Cfg) -> Vec<u8> {
let mut code = Code::new(src); let mut code = Code::new(src);
let parsed = parse_content(cfg, &mut code, Namespace::Html, EMPTY_TAG_NAME, EMPTY_TAG_NAME); let parsed = parse_content(
cfg,
&mut code,
Namespace::Html,
EMPTY_TAG_NAME,
EMPTY_TAG_NAME,
);
let mut out = Vec::with_capacity(src.len()); let mut out = Vec::with_capacity(src.len());
minify_content(cfg, &mut out, EMPTY_TAG_NAME, &parsed.children); minify_content(cfg, &mut out, EMPTY_TAG_NAME, &parsed.children);
out out

View File

@ -13,7 +13,7 @@ fn build_double_quoted_replacer() -> Replacer {
for c in "0123456789;".bytes() { for c in "0123456789;".bytes() {
patterns.push(vec![b'"', c]); patterns.push(vec![b'"', c]);
replacements.push(vec![b'&', b'#', b'3', b'4', b';', c]); replacements.push(vec![b'&', b'#', b'3', b'4', b';', c]);
}; }
patterns.push(b"\"".to_vec()); patterns.push(b"\"".to_vec());
replacements.push(b"&#34".to_vec()); replacements.push(b"&#34".to_vec());
@ -35,7 +35,7 @@ fn build_single_quoted_replacer() -> Replacer {
for c in "0123456789;".bytes() { for c in "0123456789;".bytes() {
patterns.push(vec![b'\'', c]); patterns.push(vec![b'\'', c]);
replacements.push(vec![b'&', b'#', b'3', b'9', b';', c]); replacements.push(vec![b'&', b'#', b'3', b'9', b';', c]);
}; }
patterns.push(b"'".to_vec()); patterns.push(b"'".to_vec());
replacements.push(b"&#39".to_vec()); replacements.push(b"&#39".to_vec());
@ -71,12 +71,12 @@ fn build_unquoted_replacer() -> Replacer {
ent.push(c); ent.push(c);
ent ent
}); });
}; }
}; }
for &(ws, rep) in WS { for &(ws, rep) in WS {
patterns.push(vec![ws]); patterns.push(vec![ws]);
replacements.push(rep.to_vec()); replacements.push(rep.to_vec());
}; }
// Replace all `>` with `&GT`, unless the chevron is followed by a semicolon, // Replace all `>` with `&GT`, unless the chevron is followed by a semicolon,
// in which case add a semicolon to the encoded entity. // in which case add a semicolon to the encoded entity.
@ -148,11 +148,7 @@ pub fn minify_attr_val(val: &[u8]) -> Vec<u8> {
}, },
_ => b"", _ => b"",
}; };
let start = if !first_char_encoded.is_empty() { let start = if !first_char_encoded.is_empty() { 1 } else { 0 };
1
} else {
0
};
MinifiedVal { MinifiedVal {
prefix: b"", prefix: b"",
data: res, data: res,

View File

@ -1,11 +1,6 @@
use crate::cfg::Cfg; use crate::cfg::Cfg;
pub fn minify_bang( pub fn minify_bang(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8], ended: bool) -> () {
cfg: &Cfg,
out: &mut Vec<u8>,
code: &[u8],
ended: bool,
) -> () {
if !cfg.remove_bangs { if !cfg.remove_bangs {
out.extend_from_slice(b"<!"); out.extend_from_slice(b"<!");
out.extend_from_slice(&code); out.extend_from_slice(&code);

View File

@ -1,11 +1,6 @@
use crate::cfg::Cfg; use crate::cfg::Cfg;
pub fn minify_comment( pub fn minify_comment(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8], ended: bool) -> () {
cfg: &Cfg,
out: &mut Vec<u8>,
code: &[u8],
ended: bool,
) -> () {
if !cfg.remove_comments { if !cfg.remove_comments {
out.extend_from_slice(b"<!--"); out.extend_from_slice(b"<!--");
out.extend_from_slice(&code); out.extend_from_slice(&code);

View File

@ -24,7 +24,7 @@ fn build_chevron_replacer() -> Replacer {
patterns.push(vec![b'<', c]); patterns.push(vec![b'<', c]);
replacements.push(vec![b'&', b'L', b'T', c]); replacements.push(vec![b'&', b'L', b'T', c]);
}; };
}; }
Replacer::new( Replacer::new(
AhoCorasickBuilder::new() AhoCorasickBuilder::new()
@ -52,7 +52,7 @@ pub fn minify_content(
NodeData::Text { .. } | NodeData::Element { .. } => break, NodeData::Text { .. } | NodeData::Element { .. } => break,
_ => index_of_last_text_or_elem_child -= 1, _ => index_of_last_text_or_elem_child -= 1,
}; };
}; }
let mut previous_sibling_element: &[u8] = EMPTY_TAG_NAME; let mut previous_sibling_element: &[u8] = EMPTY_TAG_NAME;
for (i, c) in nodes.iter().enumerate() { for (i, c) in nodes.iter().enumerate() {
@ -84,11 +84,8 @@ pub fn minify_content(
ScriptOrStyleLang::Data => out.extend_from_slice(code), ScriptOrStyleLang::Data => out.extend_from_slice(code),
ScriptOrStyleLang::JS => minify_js(cfg, out, code), ScriptOrStyleLang::JS => minify_js(cfg, out, code),
}, },
NodeData::Text { value } => out.extend_from_slice( NodeData::Text { value } => out
&CHEVRON_REPLACER.replace_all( .extend_from_slice(&CHEVRON_REPLACER.replace_all(&encode_ampersands(value, false))),
&encode_ampersands(value, false)
)
),
}; };
}; }
} }

View File

@ -12,8 +12,8 @@ use crate::minify::instruction::minify_instruction;
use crate::minify::js::minify_js; use crate::minify::js::minify_js;
use crate::pattern::Replacer; use crate::pattern::Replacer;
use crate::spec::entity::encode::encode_ampersands; use crate::spec::entity::encode::encode_ampersands;
use crate::spec::tag::EMPTY_TAG_NAME;
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node}; use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
use crate::spec::tag::EMPTY_TAG_NAME;
#[derive(Copy, Clone, Eq, PartialEq)] #[derive(Copy, Clone, Eq, PartialEq)]
enum AttrType { enum AttrType {
@ -35,10 +35,9 @@ pub fn minify_element(
closing_tag: ElementClosingTag, closing_tag: ElementClosingTag,
children: &[NodeData], children: &[NodeData],
) -> () { ) -> () {
let can_omit_closing_tag = cfg.omit_closing_tags && ( let can_omit_closing_tag = cfg.omit_closing_tags
can_omit_as_before(previous_sibling_element, tag_name) && (can_omit_as_before(previous_sibling_element, tag_name)
|| (is_last_child_text_or_element_node && can_omit_as_last_node(parent, tag_name)) || (is_last_child_text_or_element_node && can_omit_as_last_node(parent, tag_name)));
);
out.push(b'<'); out.push(b'<');
out.extend_from_slice(tag_name); out.extend_from_slice(tag_name);
@ -50,13 +49,9 @@ pub fn minify_element(
out.extend_from_slice(name); out.extend_from_slice(name);
if !value.is_empty() { if !value.is_empty() {
out.push(b'='); out.push(b'=');
out.extend_from_slice( out.extend_from_slice(&minify_attr_val(&encode_ampersands(value, true)));
&minify_attr_val(
&encode_ampersands(value, true),
),
);
}; };
}; }
if closing_tag == ElementClosingTag::SelfClosing { if closing_tag == ElementClosingTag::SelfClosing {
if last_attr == AttrType::Unquoted { if last_attr == AttrType::Unquoted {
out.push(b' '); out.push(b' ');
@ -72,7 +67,8 @@ pub fn minify_element(
minify_content(cfg, out, tag_name, children); minify_content(cfg, out, tag_name, children);
if closing_tag != ElementClosingTag::Present || (cfg.omit_closing_tags && can_omit_closing_tag) { if closing_tag != ElementClosingTag::Present || (cfg.omit_closing_tags && can_omit_closing_tag)
{
return; return;
}; };
out.extend_from_slice(b"</"); out.extend_from_slice(b"</");

View File

@ -1,11 +1,6 @@
use crate::cfg::Cfg; use crate::cfg::Cfg;
pub fn minify_instruction( pub fn minify_instruction(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8], ended: bool) -> () {
cfg: &Cfg,
out: &mut Vec<u8>,
code: &[u8],
ended: bool,
) -> () {
if !cfg.remove_processing_instructions { if !cfg.remove_processing_instructions {
out.extend_from_slice(b"<?"); out.extend_from_slice(b"<?");
out.extend_from_slice(&code); out.extend_from_slice(&code);

View File

@ -1,6 +1,6 @@
use crate::ast::NodeData; use crate::ast::NodeData;
use crate::Cfg;
use crate::parse::Code; use crate::parse::Code;
use crate::Cfg;
use memchr::memchr; use memchr::memchr;
pub fn parse_bang(cfg: &Cfg, code: &mut Code) -> NodeData { pub fn parse_bang(cfg: &Cfg, code: &mut Code) -> NodeData {

View File

@ -2,8 +2,8 @@ use aho_corasick::AhoCorasick;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use crate::ast::NodeData; use crate::ast::NodeData;
use crate::Cfg;
use crate::parse::Code; use crate::parse::Code;
use crate::Cfg;
lazy_static! { lazy_static! {
static ref COMMENT_END: AhoCorasick = AhoCorasick::new(&["-->"]); static ref COMMENT_END: AhoCorasick = AhoCorasick::new(&["-->"]);

View File

@ -3,17 +3,17 @@ use lazy_static::lazy_static;
use memchr::memrchr; use memchr::memrchr;
use crate::ast::NodeData; use crate::ast::NodeData;
use crate::Cfg;
use crate::parse::bang::parse_bang; use crate::parse::bang::parse_bang;
use crate::parse::Code;
use crate::parse::comment::parse_comment; use crate::parse::comment::parse_comment;
use crate::parse::content::ContentType::*; use crate::parse::content::ContentType::*;
use crate::parse::element::{parse_element, parse_tag, peek_tag_name}; use crate::parse::element::{parse_element, parse_tag, peek_tag_name};
use crate::parse::instruction::parse_instruction; use crate::parse::instruction::parse_instruction;
use crate::parse::Code;
use crate::spec::entity::decode::decode_entities; use crate::spec::entity::decode::decode_entities;
use crate::spec::tag::ns::Namespace; use crate::spec::tag::ns::Namespace;
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node}; use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
use crate::spec::tag::void::VOID_TAGS; use crate::spec::tag::void::VOID_TAGS;
use crate::Cfg;
#[derive(Copy, Clone, Eq, PartialEq)] #[derive(Copy, Clone, Eq, PartialEq)]
enum ContentType { enum ContentType {
@ -43,7 +43,8 @@ lazy_static! {
} }
// Keep in sync with order of patterns in CONTENT_TYPE_PATTERN. // Keep in sync with order of patterns in CONTENT_TYPE_PATTERN.
static CONTENT_TYPE_FROM_PATTERN: &'static [ContentType] = &[OpeningTag, ClosingTag, Instruction, Bang, Comment]; static CONTENT_TYPE_FROM_PATTERN: &'static [ContentType] =
&[OpeningTag, ClosingTag, Instruction, Bang, Comment];
pub struct ParsedContent { pub struct ParsedContent {
pub children: Vec<NodeData>, pub children: Vec<NodeData>,
@ -51,7 +52,13 @@ pub struct ParsedContent {
} }
// Use empty slice for `grandparent` or `parent` if none. // Use empty slice for `grandparent` or `parent` if none.
pub fn parse_content(cfg: &Cfg, code: &mut Code, ns: Namespace, grandparent: &[u8], parent: &[u8]) -> ParsedContent { pub fn parse_content(
cfg: &Cfg,
code: &mut Code,
ns: Namespace,
grandparent: &[u8],
parent: &[u8],
) -> ParsedContent {
// We assume the closing tag has been omitted until we see one explicitly before EOF (or it has been omitted as per the spec). // We assume the closing tag has been omitted until we see one explicitly before EOF (or it has been omitted as per the spec).
let mut closing_tag_omitted = true; let mut closing_tag_omitted = true;
let mut nodes = Vec::<NodeData>::new(); let mut nodes = Vec::<NodeData>::new();
@ -80,7 +87,9 @@ pub fn parse_content(cfg: &Cfg, code: &mut Code, ns: Namespace, grandparent: &[u
if name.is_empty() { if name.is_empty() {
// Malformed code, drop until and including next `>`. // Malformed code, drop until and including next `>`.
typ = MalformedLeftChevronSlash; typ = MalformedLeftChevronSlash;
} else if grandparent == name.as_slice() && can_omit_as_last_node(grandparent, parent) { } else if grandparent == name.as_slice()
&& can_omit_as_last_node(grandparent, parent)
{
// The upcoming closing tag implicitly closes the current element e.g. `<tr><td>(current position)</tr>`. // The upcoming closing tag implicitly closes the current element e.g. `<tr><td>(current position)</tr>`.
// This DOESN'T handle when grandparent doesn't exist (represented by an empty slice). However, in that case it's irrelevant, as it would mean we would be at EOF, and our parser simply auto-closes everything anyway. (Normally we'd have to determine if `<p>Hello` is an error or allowed.) // This DOESN'T handle when grandparent doesn't exist (represented by an empty slice). However, in that case it's irrelevant, as it would mean we would be at EOF, and our parser simply auto-closes everything anyway. (Normally we'd have to determine if `<p>Hello` is an error or allowed.)
typ = OmittedClosingTag; typ = OmittedClosingTag;
@ -119,7 +128,7 @@ pub fn parse_content(cfg: &Cfg, code: &mut Code, ns: Namespace, grandparent: &[u
} }
ClosingTagForVoidElement => drop(parse_tag(code)), ClosingTagForVoidElement => drop(parse_tag(code)),
}; };
}; }
debug_assert_eq!(text_len, 0); debug_assert_eq!(text_len, 0);
ParsedContent { ParsedContent {
children: nodes, children: nodes,

View File

@ -1,17 +1,20 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang}; use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang};
use crate::Cfg; use crate::gen::codepoints::{
use crate::gen::codepoints::{ATTR_QUOTE, DOUBLE_QUOTE, NOT_UNQUOTED_ATTR_VAL_CHAR, SINGLE_QUOTE, TAG_NAME_CHAR, WHITESPACE, WHITESPACE_OR_SLASH}; ATTR_QUOTE, DOUBLE_QUOTE, NOT_UNQUOTED_ATTR_VAL_CHAR, SINGLE_QUOTE, TAG_NAME_CHAR, WHITESPACE,
use crate::parse::Code; WHITESPACE_OR_SLASH,
};
use crate::parse::content::{parse_content, ParsedContent}; use crate::parse::content::{parse_content, ParsedContent};
use crate::parse::script::parse_script_content; use crate::parse::script::parse_script_content;
use crate::parse::style::parse_style_content; use crate::parse::style::parse_style_content;
use crate::parse::textarea::parse_textarea_content; use crate::parse::textarea::parse_textarea_content;
use crate::parse::Code;
use crate::spec::entity::decode::decode_entities; use crate::spec::entity::decode::decode_entities;
use crate::spec::script::JAVASCRIPT_MIME_TYPES; use crate::spec::script::JAVASCRIPT_MIME_TYPES;
use crate::spec::tag::ns::Namespace; use crate::spec::tag::ns::Namespace;
use crate::spec::tag::void::VOID_TAGS; use crate::spec::tag::void::VOID_TAGS;
use crate::Cfg;
fn parse_tag_name(code: &mut Code) -> Vec<u8> { fn parse_tag_name(code: &mut Code) -> Vec<u8> {
debug_assert!(code.str().starts_with(b"<")); debug_assert!(code.str().starts_with(b"<"));
@ -66,7 +69,10 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
None => NOT_UNQUOTED_ATTR_VAL_CHAR, None => NOT_UNQUOTED_ATTR_VAL_CHAR,
_ => unreachable!(), _ => unreachable!(),
}; };
let attr_value = decode_entities(code.slice_and_shift_while_not_in_lookup(attr_delim_pred), true); let attr_value = decode_entities(
code.slice_and_shift_while_not_in_lookup(attr_delim_pred),
true,
);
if let Some(c) = attr_delim { if let Some(c) = attr_delim {
// It might not be next if EOF (i.e. attribute value not closed). // It might not be next if EOF (i.e. attribute value not closed).
code.shift_if_next(c); code.shift_if_next(c);
@ -74,7 +80,7 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
attr_value attr_value
}; };
attributes.insert(attr_name, attr_value); attributes.insert(attr_name, attr_value);
}; }
ParsedTag { ParsedTag {
attributes, attributes,
name: elem_name, name: elem_name,
@ -121,12 +127,14 @@ pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -
} = match elem_name.as_slice() { } = match elem_name.as_slice() {
// TODO to_vec call allocates every time? // TODO to_vec call allocates every time?
b"script" => match attributes.get(&b"type".to_vec()) { b"script" => match attributes.get(&b"type".to_vec()) {
Some(mime) if !JAVASCRIPT_MIME_TYPES.contains(mime.as_slice()) => parse_script_content(cfg, code, ScriptOrStyleLang::Data), Some(mime) if !JAVASCRIPT_MIME_TYPES.contains(mime.as_slice()) => {
parse_script_content(cfg, code, ScriptOrStyleLang::Data)
}
_ => parse_script_content(cfg, code, ScriptOrStyleLang::JS), _ => parse_script_content(cfg, code, ScriptOrStyleLang::JS),
}, },
b"style" => parse_style_content(cfg, code), b"style" => parse_style_content(cfg, code),
b"textarea" => parse_textarea_content(cfg, code), b"textarea" => parse_textarea_content(cfg, code),
_ => parse_content(cfg, code, child_ns, parent, &elem_name) _ => parse_content(cfg, code, child_ns, parent, &elem_name),
}; };
if !closing_tag_omitted { if !closing_tag_omitted {

View File

@ -2,8 +2,8 @@ use aho_corasick::AhoCorasick;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use crate::ast::NodeData; use crate::ast::NodeData;
use crate::Cfg;
use crate::parse::Code; use crate::parse::Code;
use crate::Cfg;
lazy_static! { lazy_static! {
static ref INSTRUCTION_END: AhoCorasick = AhoCorasick::new(&["?>"]); static ref INSTRUCTION_END: AhoCorasick = AhoCorasick::new(&["?>"]);

View File

@ -19,10 +19,7 @@ pub struct Checkpoint(usize);
impl<'c> Code<'c> { impl<'c> Code<'c> {
pub fn new(code: &[u8]) -> Code { pub fn new(code: &[u8]) -> Code {
Code { Code { code, next: 0 }
code,
next: 0,
}
} }
pub fn str(&self) -> &[u8] { pub fn str(&self) -> &[u8] {
@ -59,7 +56,12 @@ impl<'c> Code<'c> {
} }
pub fn shift_if_next_seq(&mut self, seq: &'static [u8]) -> bool { pub fn shift_if_next_seq(&mut self, seq: &'static [u8]) -> bool {
if self.code.get(self.next..self.next + seq.len()).filter(|&n| n == seq).is_some() { if self
.code
.get(self.next..self.next + seq.len())
.filter(|&n| n == seq)
.is_some()
{
self.next += seq.len(); self.next += seq.len();
true true
} else { } else {
@ -88,7 +90,7 @@ impl<'c> Code<'c> {
Some(&c) if lookup[c] => len += 1, Some(&c) if lookup[c] => len += 1,
_ => break, _ => break,
}; };
}; }
self.copy_and_shift(len) self.copy_and_shift(len)
} }
@ -99,7 +101,7 @@ impl<'c> Code<'c> {
Some(&c) if !lookup[c] => len += 1, Some(&c) if !lookup[c] => len += 1,
_ => break, _ => break,
}; };
}; }
self.slice_and_shift(len) self.slice_and_shift(len)
} }
@ -118,7 +120,7 @@ impl<'c> Code<'c> {
} }
_ => break, _ => break,
}; };
}; }
last last
} }

View File

@ -3,9 +3,9 @@ use aho_corasick::AhoCorasickBuilder;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use crate::ast::{NodeData, ScriptOrStyleLang}; use crate::ast::{NodeData, ScriptOrStyleLang};
use crate::Cfg;
use crate::parse::Code;
use crate::parse::content::ParsedContent; use crate::parse::content::ParsedContent;
use crate::parse::Code;
use crate::Cfg;
lazy_static! { lazy_static! {
static ref END: AhoCorasick = AhoCorasickBuilder::new() static ref END: AhoCorasick = AhoCorasickBuilder::new()
@ -20,6 +20,9 @@ pub fn parse_script_content(cfg: &Cfg, code: &mut Code, lang: ScriptOrStyleLang)
}; };
ParsedContent { ParsedContent {
closing_tag_omitted, closing_tag_omitted,
children: vec![NodeData::ScriptOrStyleContent { code: code.copy_and_shift(len), lang }], children: vec![NodeData::ScriptOrStyleContent {
code: code.copy_and_shift(len),
lang,
}],
} }
} }

View File

@ -3,9 +3,9 @@ use aho_corasick::AhoCorasickBuilder;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use crate::ast::{NodeData, ScriptOrStyleLang}; use crate::ast::{NodeData, ScriptOrStyleLang};
use crate::Cfg;
use crate::parse::Code;
use crate::parse::content::ParsedContent; use crate::parse::content::ParsedContent;
use crate::parse::Code;
use crate::Cfg;
lazy_static! { lazy_static! {
static ref END: AhoCorasick = AhoCorasickBuilder::new() static ref END: AhoCorasick = AhoCorasickBuilder::new()
@ -20,11 +20,9 @@ pub fn parse_style_content(cfg: &Cfg, code: &mut Code) -> ParsedContent {
}; };
ParsedContent { ParsedContent {
closing_tag_omitted, closing_tag_omitted,
children: vec![ children: vec![NodeData::ScriptOrStyleContent {
NodeData::ScriptOrStyleContent { code: code.copy_and_shift(len),
code: code.copy_and_shift(len), lang: ScriptOrStyleLang::CSS,
lang: ScriptOrStyleLang::CSS, }],
},
],
} }
} }

View File

@ -3,10 +3,10 @@ use aho_corasick::AhoCorasickBuilder;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use crate::ast::NodeData; use crate::ast::NodeData;
use crate::Cfg;
use crate::parse::Code;
use crate::parse::content::ParsedContent; use crate::parse::content::ParsedContent;
use crate::parse::Code;
use crate::spec::entity::decode::decode_entities; use crate::spec::entity::decode::decode_entities;
use crate::Cfg;
lazy_static! { lazy_static! {
static ref END: AhoCorasick = AhoCorasickBuilder::new() static ref END: AhoCorasick = AhoCorasickBuilder::new()
@ -21,6 +21,8 @@ pub fn parse_textarea_content(cfg: &Cfg, code: &mut Code) -> ParsedContent {
}; };
ParsedContent { ParsedContent {
closing_tag_omitted, closing_tag_omitted,
children: vec![NodeData::Text { value: decode_entities(code.slice_and_shift(len), false) }], children: vec![NodeData::Text {
value: decode_entities(code.slice_and_shift(len), false),
}],
} }
} }

View File

@ -46,7 +46,7 @@ impl<V: 'static + Copy> TrieNode<V> {
if node.value.is_some() { if node.value.is_some() {
break; break;
}; };
}; }
(node, pos) (node, pos)
} }
@ -65,7 +65,7 @@ impl<V: 'static + Copy> TrieNode<V> {
Some(v) => value = Some(TrieNodeMatch::Found { len: pos, value: v }), Some(v) => value = Some(TrieNodeMatch::Found { len: pos, value: v }),
None => {} None => {}
}; };
}; }
value.unwrap_or(TrieNodeMatch::NotFound { reached: pos }) value.unwrap_or(TrieNodeMatch::NotFound { reached: pos })
} }
} }
@ -77,7 +77,10 @@ pub struct Replacer {
impl Replacer { impl Replacer {
pub fn new(searcher: AhoCorasick, replacements: Vec<Vec<u8>>) -> Replacer { pub fn new(searcher: AhoCorasick, replacements: Vec<Vec<u8>>) -> Replacer {
Replacer { searcher, replacements } Replacer {
searcher,
replacements,
}
} }
pub fn replace_all(&self, src: &[u8]) -> Vec<u8> { pub fn replace_all(&self, src: &[u8]) -> Vec<u8> {

View File

@ -17,8 +17,10 @@ use std::char::from_u32;
use memchr::memchr; use memchr::memchr;
use crate::gen::codepoints::{ALPHANUMERIC_OR_EQUALS, DIGIT, HEX_DIGIT, Lookup, LOWER_HEX_ALPHA, UPPER_HEX_ALPHA}; use crate::gen::codepoints::{
use crate::gen::entities::{ENTITY, EntityType}; Lookup, ALPHANUMERIC_OR_EQUALS, DIGIT, HEX_DIGIT, LOWER_HEX_ALPHA, UPPER_HEX_ALPHA,
};
use crate::gen::entities::{EntityType, ENTITY};
use crate::pattern::TrieNodeMatch; use crate::pattern::TrieNodeMatch;
enum Decoded { enum Decoded {
@ -44,7 +46,7 @@ fn parse_numeric_entity(
// Skip initial zeros. // Skip initial zeros.
while code.get(read_next).filter(|c| **c == b'0').is_some() { while code.get(read_next).filter(|c| **c == b'0').is_some() {
read_next += 1; read_next += 1;
}; }
// Browser will still continue to consume digits past max_digits. // Browser will still continue to consume digits past max_digits.
loop { loop {
match code.get(read_next) { match code.get(read_next) {
@ -56,7 +58,7 @@ fn parse_numeric_entity(
} }
_ => break, _ => break,
}; };
}; }
// Semicolon is required by spec but seems to be optional in actual browser behaviour. // Semicolon is required by spec but seems to be optional in actual browser behaviour.
if let Some(b';') = code.get(read_next) { if let Some(b';') = code.get(read_next) {
read_next += 1; read_next += 1;
@ -79,7 +81,10 @@ fn parse_entity(code: &[u8], in_attr_val: bool) -> ParsedEntity {
read_len: reached, read_len: reached,
decoded: Decoded::Ignored, decoded: Decoded::Ignored,
}, },
TrieNodeMatch::Found { len: match_len, value } => match value { TrieNodeMatch::Found {
len: match_len,
value,
} => match value {
EntityType::Dec => parse_numeric_entity( EntityType::Dec => parse_numeric_entity(
// Skip past '&#'. Note that match_len is 3 as it matches '&#[0-9]'. // Skip past '&#'. Note that match_len is 3 as it matches '&#[0-9]'.
&code[2..], &code[2..],
@ -91,16 +96,24 @@ fn parse_entity(code: &[u8], in_attr_val: bool) -> ParsedEntity {
// Skip past '&#x'. Note that match_len is 4 as it matches '&#x[0-9a-fA-F]'. // Skip past '&#x'. Note that match_len is 4 as it matches '&#x[0-9a-fA-F]'.
&code[3..], &code[3..],
HEX_DIGIT, HEX_DIGIT,
|value, c| value.wrapping_mul(16).wrapping_add(match c { |value, c| {
c if DIGIT[c] => (c - b'0') as u32, value.wrapping_mul(16).wrapping_add(match c {
c if LOWER_HEX_ALPHA[c] => 10 + (c - b'a') as u32, c if DIGIT[c] => (c - b'0') as u32,
c if UPPER_HEX_ALPHA[c] => 10 + (c - b'A') as u32, c if LOWER_HEX_ALPHA[c] => 10 + (c - b'a') as u32,
_ => unreachable!(), c if UPPER_HEX_ALPHA[c] => 10 + (c - b'A') as u32,
}), _ => unreachable!(),
})
},
6, 6,
), ),
EntityType::Named(decoded) => { EntityType::Named(decoded) => {
if in_attr_val && code[match_len - 1] != b';' && code.get(match_len).filter(|&&c| ALPHANUMERIC_OR_EQUALS[c]).is_some() { if in_attr_val
&& code[match_len - 1] != b';'
&& code
.get(match_len)
.filter(|&&c| ALPHANUMERIC_OR_EQUALS[c])
.is_some()
{
// Don't decode if named entity is inside an attribute value and doesn't end with semicolon but is followed by an alphanumeric or `=` character. // Don't decode if named entity is inside an attribute value and doesn't end with semicolon but is followed by an alphanumeric or `=` character.
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state. // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state.
ParsedEntity { ParsedEntity {
@ -129,10 +142,7 @@ pub fn decode_entities(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
res.extend_from_slice(&code[..before]); res.extend_from_slice(&code[..before]);
code = &code[before..]; code = &code[before..];
if matched { if matched {
let ParsedEntity { let ParsedEntity { decoded, read_len } = parse_entity(code, in_attr_val);
decoded,
read_len,
} = parse_entity(code, in_attr_val);
match decoded { match decoded {
Decoded::Numeric(c) => { Decoded::Numeric(c) => {
let mut encoded = [0u8; 4]; let mut encoded = [0u8; 4];
@ -140,10 +150,10 @@ pub fn decode_entities(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
res.extend_from_slice(&encoded); res.extend_from_slice(&encoded);
} }
Decoded::Ignored => res.extend_from_slice(&code[..read_len]), Decoded::Ignored => res.extend_from_slice(&code[..read_len]),
Decoded::Named(s) => res.extend_from_slice(s) Decoded::Named(s) => res.extend_from_slice(s),
}; };
code = &code[read_len..]; code = &code[read_len..];
}; };
}; }
res res
} }

View File

@ -1,7 +1,7 @@
use memchr::memchr; use memchr::memchr;
use crate::gen::codepoints::ALPHANUMERIC_OR_EQUALS; use crate::gen::codepoints::ALPHANUMERIC_OR_EQUALS;
use crate::gen::entities::{ENTITY, EntityType}; use crate::gen::entities::{EntityType, ENTITY};
use crate::pattern::TrieNodeMatch; use crate::pattern::TrieNodeMatch;
pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> { pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
@ -19,9 +19,14 @@ pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
TrieNodeMatch::NotFound { reached } => reached, TrieNodeMatch::NotFound { reached } => reached,
TrieNodeMatch::Found { len, value } => { TrieNodeMatch::Found { len, value } => {
match value { match value {
EntityType::Named(_) if in_attr_val EntityType::Named(_)
&& code[len - 1] != b';' if in_attr_val
&& code.get(len).filter(|&&c| ALPHANUMERIC_OR_EQUALS[c]).is_some() => { && code[len - 1] != b';'
&& code
.get(len)
.filter(|&&c| ALPHANUMERIC_OR_EQUALS[c])
.is_some() =>
{
// A named entity inside an attribute value that doesn't end with semicolon but is followed by an alphanumeric or `=` character is not decoded, so we don't need to encode. // A named entity inside an attribute value that doesn't end with semicolon but is followed by an alphanumeric or `=` character is not decoded, so we don't need to encode.
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state. // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state.
} }
@ -36,6 +41,6 @@ pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
res.extend_from_slice(&code[..len]); res.extend_from_slice(&code[..len]);
code = &code[len..]; code = &code[len..];
}; };
}; }
res res
} }

View File

@ -3,4 +3,4 @@ pub mod omission;
pub mod void; pub mod void;
pub mod whitespace; pub mod whitespace;
pub static EMPTY_TAG_NAME: &'static[u8] = &[]; pub static EMPTY_TAG_NAME: &'static [u8] = &[];

View File

@ -1,5 +1,5 @@
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::collections::{HashSet, HashMap}; use std::collections::{HashMap, HashSet};
// Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission. // Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission.
// TODO Opening tags // TODO Opening tags
@ -161,14 +161,15 @@ lazy_static! {
} }
lazy_static! { lazy_static! {
static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule =
followed_by: { ClosingTagOmissionRule {
let mut s = HashSet::<&'static [u8]>::new(); followed_by: {
s.insert(b"optgroup"); let mut s = HashSet::<&'static [u8]>::new();
s s.insert(b"optgroup");
}, s
is_last: ClosingTagOmissionRuleIfLast::Always, },
}; is_last: ClosingTagOmissionRuleIfLast::Always,
};
} }
lazy_static! { lazy_static! {
@ -275,7 +276,8 @@ lazy_static! {
// Use an empty slice for `parent` if no parent. // Use an empty slice for `parent` if no parent.
pub fn can_omit_as_last_node(parent: &[u8], child: &[u8]) -> bool { pub fn can_omit_as_last_node(parent: &[u8], child: &[u8]) -> bool {
CLOSING_TAG_OMISSION_RULES.get(child) CLOSING_TAG_OMISSION_RULES
.get(child)
.filter(|r| match &r.is_last { .filter(|r| match &r.is_last {
ClosingTagOmissionRuleIfLast::Always => true, ClosingTagOmissionRuleIfLast::Always => true,
ClosingTagOmissionRuleIfLast::Never => false, ClosingTagOmissionRuleIfLast::Never => false,
@ -286,7 +288,8 @@ pub fn can_omit_as_last_node(parent: &[u8], child: &[u8]) -> bool {
// Use an empty slice for `before` if no previous sibling element. // Use an empty slice for `before` if no previous sibling element.
pub fn can_omit_as_before(before: &[u8], after: &[u8]) -> bool { pub fn can_omit_as_before(before: &[u8], after: &[u8]) -> bool {
CLOSING_TAG_OMISSION_RULES.get(before) CLOSING_TAG_OMISSION_RULES
.get(before)
.filter(|r| r.followed_by.contains(after)) .filter(|r| r.followed_by.contains(after))
.is_some() .is_some()
} }

View File

@ -166,7 +166,10 @@ lazy_static! {
} }
#[inline(always)] #[inline(always)]
pub fn get_whitespace_minification_for_tag(tag_name: Option<&[u8]>, descendant_of_pre: bool) -> &'static WhitespaceMinification { pub fn get_whitespace_minification_for_tag(
tag_name: Option<&[u8]>,
descendant_of_pre: bool,
) -> &'static WhitespaceMinification {
if descendant_of_pre { if descendant_of_pre {
WHITESPACE_SENSITIVE WHITESPACE_SENSITIVE
} else { } else {

View File

@ -3,9 +3,16 @@ fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) -> () {
let mut code = src.to_vec(); let mut code = src.to_vec();
match super::with_friendly_error(&mut code, cfg) { match super::with_friendly_error(&mut code, cfg) {
Ok(len) => { Ok(len) => {
assert_eq!(std::str::from_utf8(&code[..len]).unwrap(), std::str::from_utf8(expected).unwrap()); assert_eq!(
std::str::from_utf8(&code[..len]).unwrap(),
std::str::from_utf8(expected).unwrap()
);
} }
Err(super::FriendlyError { code_context, message, .. }) => { Err(super::FriendlyError {
code_context,
message,
..
}) => {
println!("{}", message); println!("{}", message);
println!("{}", code_context); println!("{}", code_context);
assert!(false); assert!(false);
@ -16,41 +23,60 @@ fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) -> () {
#[cfg(test)] #[cfg(test)]
fn _eval_error(src: &'static [u8], expected: ErrorType, cfg: &super::Cfg) -> () { fn _eval_error(src: &'static [u8], expected: ErrorType, cfg: &super::Cfg) -> () {
let mut code = src.to_vec(); let mut code = src.to_vec();
assert_eq!(super::in_place(&mut code, cfg).unwrap_err().error_type, expected); assert_eq!(
super::in_place(&mut code, cfg).unwrap_err().error_type,
expected
);
} }
#[cfg(test)] #[cfg(test)]
fn eval(src: &'static [u8], expected: &'static [u8]) -> () { fn eval(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(src, expected, &super::Cfg { _eval(
minify_js: false, src,
minify_css: false, expected,
}); &super::Cfg {
minify_js: false,
minify_css: false,
},
);
} }
#[cfg(test)] #[cfg(test)]
fn eval_error(src: &'static [u8], expected: ErrorType) -> () { fn eval_error(src: &'static [u8], expected: ErrorType) -> () {
_eval_error(src, expected, &super::Cfg { _eval_error(
minify_js: false, src,
minify_css: false, expected,
}); &super::Cfg {
minify_js: false,
minify_css: false,
},
);
} }
#[cfg(test)] #[cfg(test)]
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () { fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(src, expected, &super::Cfg { _eval(
minify_js: true, src,
minify_css: false, expected,
}); &super::Cfg {
minify_js: true,
minify_css: false,
},
);
} }
#[cfg(test)] #[cfg(test)]
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () { fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(src, expected, &super::Cfg { _eval(
minify_js: false, src,
minify_css: true, expected,
}); &super::Cfg {
minify_js: false,
minify_css: true,
},
);
} }
#[test] #[test]
@ -75,7 +101,10 @@ fn test_collapse_destroy_whole_and_trim_whitespace() {
eval(b"<ul> \n&#32; </ul>", b"<ul></ul>"); eval(b"<ul> \n&#32; </ul>", b"<ul></ul>");
eval(b"<ul> \n&#32;a </ul>", b"<ul>a</ul>"); eval(b"<ul> \n&#32;a </ul>", b"<ul>a</ul>");
eval(b"<ul> \n&#32;a b </ul>", b"<ul>a b</ul>"); eval(b"<ul> \n&#32;a b </ul>", b"<ul>a b</ul>");
eval(b"<ul> \n&#32;a<pre></pre> <pre></pre>b </ul>", b"<ul>a<pre></pre><pre></pre>b</ul>"); eval(
b"<ul> \n&#32;a<pre></pre> <pre></pre>b </ul>",
b"<ul>a<pre></pre><pre></pre>b</ul>",
);
// Tag names should be case insensitive. // Tag names should be case insensitive.
eval(b"<uL> \n&#32;a b </UL>", b"<ul>a b</ul>"); eval(b"<uL> \n&#32;a b </UL>", b"<ul>a b</ul>");
} }
@ -83,25 +112,40 @@ fn test_collapse_destroy_whole_and_trim_whitespace() {
#[test] #[test]
fn test_no_whitespace_minification() { fn test_no_whitespace_minification() {
eval(b"<pre> \n&#32; \t </pre>", b"<pre> \n \t </pre>"); eval(b"<pre> \n&#32; \t </pre>", b"<pre> \n \t </pre>");
eval(b"<textarea> \n&#32; \t </textarea>", b"<textarea> \n \t </textarea>"); eval(
b"<textarea> \n&#32; \t </textarea>",
b"<textarea> \n \t </textarea>",
);
// Tag names should be case insensitive. // Tag names should be case insensitive.
eval(b"<pRe> \n&#32; \t </PRE>", b"<pre> \n \t </pre>"); eval(b"<pRe> \n&#32; \t </PRE>", b"<pre> \n \t </pre>");
eval(b"<pre> <span> 1 2 </span> </pre>", b"<pre> <span> 1 2 </span> </pre>"); eval(
eval(b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>", b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>"); b"<pre> <span> 1 2 </span> </pre>",
eval(b"<div> <pre> <span> 1 <pre>\n</pre> 2 </span> </pre> </div>", b"<div><pre> <span> 1 <pre>\n</pre> 2 </span> </pre></div>"); b"<pre> <span> 1 2 </span> </pre>",
eval(br#"<pre><code>fn main() { );
eval(
b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>",
b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>",
);
eval(
b"<div> <pre> <span> 1 <pre>\n</pre> 2 </span> </pre> </div>",
b"<div><pre> <span> 1 <pre>\n</pre> 2 </span> </pre></div>",
);
eval(
br#"<pre><code>fn main() {
println!("Hello, world!"); println!("Hello, world!");
<span>loop { <span>loop {
println!("Hello, world!"); println!("Hello, world!");
}</span> }</span>
} }
</code></pre>"#, br#"<pre><code>fn main() { </code></pre>"#,
br#"<pre><code>fn main() {
println!("Hello, world!"); println!("Hello, world!");
<span>loop { <span>loop {
println!("Hello, world!"); println!("Hello, world!");
}</span> }</span>
} }
</code></pre>"#); </code></pre>"#,
);
} }
#[test] #[test]
@ -109,7 +153,10 @@ fn test_parsing_omitted_closing_tag() {
eval(b"<html>", b"<html>"); eval(b"<html>", b"<html>");
eval(b" <html>\n", b"<html>"); eval(b" <html>\n", b"<html>");
eval(b" <!doctype html> <html>\n", b"<!doctype html><html>"); eval(b" <!doctype html> <html>\n", b"<!doctype html><html>");
eval(b"<!doctype html><html><div> <p>Foo</div></html>", b"<!doctype html><html><div><p>Foo</div>"); eval(
b"<!doctype html><html><div> <p>Foo</div></html>",
b"<!doctype html><html><div><p>Foo</div>",
);
} }
#[test] #[test]
@ -138,19 +185,50 @@ fn test_parsing_with_omitted_tags() {
fn test_unmatched_closing_tag() { fn test_unmatched_closing_tag() {
eval_error(b"Hello</p>Goodbye", ErrorType::UnexpectedClosingTag); eval_error(b"Hello</p>Goodbye", ErrorType::UnexpectedClosingTag);
eval_error(b"Hello<br></br>Goodbye", ErrorType::UnexpectedClosingTag); eval_error(b"Hello<br></br>Goodbye", ErrorType::UnexpectedClosingTag);
eval_error(b"<div>Hello</p>Goodbye", ErrorType::ClosingTagMismatch { expected: "div".to_string(), got: "p".to_string() }); eval_error(
eval_error(b"<ul><li>a</p>", ErrorType::ClosingTagMismatch { expected: "ul".to_string(), got: "p".to_string() }); b"<div>Hello</p>Goodbye",
eval_error(b"<ul><li><rt>a</p>", ErrorType::ClosingTagMismatch { expected: "ul".to_string(), got: "p".to_string() }); ErrorType::ClosingTagMismatch {
eval_error(b"<html><head><body><ul><li><rt>a</p>", ErrorType::ClosingTagMismatch { expected: "ul".to_string(), got: "p".to_string() }); expected: "div".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<ul><li>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<ul><li><rt>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<html><head><body><ul><li><rt>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
} }
#[test] #[test]
fn test_removal_of_optional_tags() { fn test_removal_of_optional_tags() {
eval(b"<ul><li>1</li><li>2</li><li>3</li></ul>", b"<ul><li>1<li>2<li>3</ul>"); eval(
b"<ul><li>1</li><li>2</li><li>3</li></ul>",
b"<ul><li>1<li>2<li>3</ul>",
);
eval(b"<rt></rt>", b"<rt>"); eval(b"<rt></rt>", b"<rt>");
eval(b"<rt></rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>"); eval(
b"<rt></rt><rp>1</rp><div></div>",
b"<rt><rp>1</rp><div></div>",
);
eval(b"<div><rt></rt></div>", b"<div><rt></div>"); eval(b"<div><rt></rt></div>", b"<div><rt></div>");
eval(br#" eval(
br#"
<html> <html>
<head> <head>
</head> </head>
@ -158,7 +236,9 @@ fn test_removal_of_optional_tags() {
<body> <body>
</body> </body>
</html> </html>
"#, b"<html><head><body>"); "#,
b"<html><head><body>",
);
// Tag names should be case insensitive. // Tag names should be case insensitive.
eval(b"<RT></rt>", b"<rt>"); eval(b"<RT></rt>", b"<rt>");
} }
@ -168,7 +248,10 @@ fn test_removal_of_optional_closing_p_tag() {
eval(b"<p></p><address></address>", b"<p><address></address>"); eval(b"<p></p><address></address>", b"<p><address></address>");
eval(b"<p></p>", b"<p>"); eval(b"<p></p>", b"<p>");
eval(b"<map><p></p></map>", b"<map><p></p></map>"); eval(b"<map><p></p></map>", b"<map><p></p></map>");
eval(b"<map><p></p><address></address></map>", b"<map><p><address></address></map>"); eval(
b"<map><p></p><address></address></map>",
b"<map><p><address></address></map>",
);
} }
#[test] #[test]
@ -186,7 +269,10 @@ fn test_attr_single_quoted_value_minification() {
eval(b"<a b=\"&quot;hello\"></a>", b"<a b='\"hello'></a>"); eval(b"<a b=\"&quot;hello\"></a>", b"<a b='\"hello'></a>");
eval(b"<a b='\"hello'></a>", b"<a b='\"hello'></a>"); eval(b"<a b='\"hello'></a>", b"<a b='\"hello'></a>");
eval(b"<a b='/>a'></a>", b"<a b=\"/>a\"></a>"); eval(b"<a b='/>a'></a>", b"<a b=\"/>a\"></a>");
eval(b"<a b=&#x20;he&quot;llo&#x20;></a>", b"<a b=' he\"llo '></a>"); eval(
b"<a b=&#x20;he&quot;llo&#x20;></a>",
b"<a b=' he\"llo '></a>",
);
} }
#[test] #[test]
@ -203,7 +289,10 @@ fn test_attr_unquoted_value_minification() {
#[test] #[test]
fn test_class_attr_value_minification() { fn test_class_attr_value_minification() {
eval(b"<a class=&#x20;c></a>", b"<a class=c></a>"); eval(b"<a class=&#x20;c></a>", b"<a class=c></a>");
eval(b"<a class=&#x20;c&#x20&#x20;d&#x20></a>", b"<a class=\"c d\"></a>"); eval(
b"<a class=&#x20;c&#x20&#x20;d&#x20></a>",
b"<a class=\"c d\"></a>",
);
eval(b"<a class=&#x20&#x20&#x20;&#x20></a>", b"<a></a>"); eval(b"<a class=&#x20&#x20&#x20;&#x20></a>", b"<a></a>");
eval(b"<a class=\" c\n \n \"></a>", b"<a class=c></a>"); eval(b"<a class=\" c\n \n \"></a>", b"<a class=c></a>");
eval(b"<a class=\" c\n \nd \"></a>", b"<a class=\"c d\"></a>"); eval(b"<a class=\" c\n \nd \"></a>", b"<a class=\"c d\"></a>");
@ -218,13 +307,34 @@ fn test_class_attr_value_minification() {
#[test] #[test]
fn test_d_attr_value_minification() { fn test_d_attr_value_minification() {
eval(b"<svg><path d=&#x20;c /></svg>", b"<svg><path d=c /></svg>"); eval(b"<svg><path d=&#x20;c /></svg>", b"<svg><path d=c /></svg>");
eval(b"<svg><path d=&#x20;c&#x20&#x20;d&#x20 /></svg>", b"<svg><path d=\"c d\"/></svg>"); eval(
eval(b"<svg><path d=&#x20;&#x20&#x20&#x20 /></svg>", b"<svg><path/></svg>"); b"<svg><path d=&#x20;c&#x20&#x20;d&#x20 /></svg>",
eval(b"<svg><path d=\" c\n \n \" /></svg>", b"<svg><path d=c /></svg>"); b"<svg><path d=\"c d\"/></svg>",
eval(b"<svg><path d=\" c\n \nd \" /></svg>", b"<svg><path d=\"c d\"/></svg>"); );
eval(b"<svg><path d=\" \n \n \" /></svg>", b"<svg><path/></svg>"); eval(
eval(b"<svg><path d=' c\n \n ' /></svg>", b"<svg><path d=c /></svg>"); b"<svg><path d=&#x20;&#x20&#x20&#x20 /></svg>",
eval(b"<svg><path d=' c\n \nd ' /></svg>", b"<svg><path d=\"c d\"/></svg>"); b"<svg><path/></svg>",
);
eval(
b"<svg><path d=\" c\n \n \" /></svg>",
b"<svg><path d=c /></svg>",
);
eval(
b"<svg><path d=\" c\n \nd \" /></svg>",
b"<svg><path d=\"c d\"/></svg>",
);
eval(
b"<svg><path d=\" \n \n \" /></svg>",
b"<svg><path/></svg>",
);
eval(
b"<svg><path d=' c\n \n ' /></svg>",
b"<svg><path d=c /></svg>",
);
eval(
b"<svg><path d=' c\n \nd ' /></svg>",
b"<svg><path d=\"c d\"/></svg>",
);
eval(b"<svg><path d=' \n \n ' /></svg>", b"<svg><path/></svg>"); eval(b"<svg><path d=' \n \n ' /></svg>", b"<svg><path/></svg>");
// Attribute names should be case insensitive. // Attribute names should be case insensitive.
eval(b"<svg><path D=' \n \n ' /></svg>", b"<svg><path/></svg>"); eval(b"<svg><path D=' \n \n ' /></svg>", b"<svg><path/></svg>");
@ -263,12 +373,27 @@ fn test_default_attr_value_removal() {
#[test] #[test]
fn test_script_type_attr_value_removal() { fn test_script_type_attr_value_removal() {
eval(b"<script type=\"application/ecmascript\"></script>", b"<script></script>"); eval(
eval(b"<script type=\"application/javascript\"></script>", b"<script></script>"); b"<script type=\"application/ecmascript\"></script>",
eval(b"<script type=\"text/jscript\"></script>", b"<script></script>"); b"<script></script>",
eval(b"<script type=\"text/plain\"></script>", b"<script type=text/plain></script>"); );
eval(
b"<script type=\"application/javascript\"></script>",
b"<script></script>",
);
eval(
b"<script type=\"text/jscript\"></script>",
b"<script></script>",
);
eval(
b"<script type=\"text/plain\"></script>",
b"<script type=text/plain></script>",
);
// Tag and attribute names should be case insensitive. // Tag and attribute names should be case insensitive.
eval(b"<SCRipt TYPE=\"application/ecmascript\"></SCrIPT>", b"<script></script>"); eval(
b"<SCRipt TYPE=\"application/ecmascript\"></SCrIPT>",
b"<script></script>",
);
} }
#[test] #[test]
@ -282,9 +407,15 @@ fn test_empty_attr_value_removal() {
#[test] #[test]
fn test_space_between_attrs_minification() { fn test_space_between_attrs_minification() {
eval(b"<div a=\" \" b=\" \"></div>", b"<div a=\" \"b=\" \"></div>"); eval(
b"<div a=\" \" b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=' ' b=\" \"></div>", b"<div a=\" \"b=\" \"></div>"); eval(b"<div a=' ' b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
eval(b"<div a=&#x20 b=\" \"></div>", b"<div a=\" \"b=\" \"></div>"); eval(
b"<div a=&#x20 b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=\"1\" b=\" \"></div>", b"<div a=1 b=\" \"></div>"); eval(b"<div a=\"1\" b=\" \"></div>", b"<div a=1 b=\" \"></div>");
eval(b"<div a='1' b=\" \"></div>", b"<div a=1 b=\" \"></div>"); eval(b"<div a='1' b=\" \"></div>", b"<div a=1 b=\" \"></div>");
eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>"); eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>");
@ -304,7 +435,10 @@ fn test_hexadecimal_entity_decoding() {
eval(b"&#x000000000000000000000000000000000000000000030;", b"0"); eval(b"&#x000000000000000000000000000000000000000000030;", b"0");
eval(b"&#x1151;", b"\xe1\x85\x91"); eval(b"&#x1151;", b"\xe1\x85\x91");
eval(b"&#x11FFFF;", b"\xef\xbf\xbd"); eval(b"&#x11FFFF;", b"\xef\xbf\xbd");
eval(b"&#xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;", b"\xef\xbf\xbd"); eval(
b"&#xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;",
b"\xef\xbf\xbd",
);
} }
#[test] #[test]
@ -317,7 +451,10 @@ fn test_decimal_entity_decoding() {
eval(b"&#000000000000000000000000000000000000000000048;", b"0"); eval(b"&#000000000000000000000000000000000000000000048;", b"0");
eval(b"&#4433;", b"\xe1\x85\x91"); eval(b"&#4433;", b"\xe1\x85\x91");
eval(b"&#1114112;", b"\xef\xbf\xbd"); eval(b"&#1114112;", b"\xef\xbf\xbd");
eval(b"&#999999999999999999999999999999999999999999999;", b"\xef\xbf\xbd"); eval(
b"&#999999999999999999999999999999999999999999999;",
b"\xef\xbf\xbd",
);
} }
#[test] #[test]
@ -337,9 +474,18 @@ fn test_named_entity_decoding() {
// Named entities not ending with ';' in attr values are not decoded if immediately // Named entities not ending with ';' in attr values are not decoded if immediately
// followed by an alphanumeric or `=` character. (See parser for more details.) // followed by an alphanumeric or `=` character. (See parser for more details.)
eval(br#"<a href="exam ple?&gta=5"></a>"#, br#"<a href="exam ple?&gta=5"></a>"#); eval(
eval(br#"<a href="exam ple?&gt=5"></a>"#, br#"<a href="exam ple?&gt=5"></a>"#); br#"<a href="exam ple?&gta=5"></a>"#,
eval(br#"<a href="exam ple?&gt~5"></a>"#, br#"<a href="exam ple?>~5"></a>"#); br#"<a href="exam ple?&gta=5"></a>"#,
);
eval(
br#"<a href="exam ple?&gt=5"></a>"#,
br#"<a href="exam ple?&gt=5"></a>"#,
);
eval(
br#"<a href="exam ple?&gt~5"></a>"#,
br#"<a href="exam ple?>~5"></a>"#,
);
} }
#[test] #[test]
@ -419,9 +565,15 @@ fn test_left_chevron_in_content() {
#[test] #[test]
fn test_comments_removal() { fn test_comments_removal() {
eval(b"<pre>a <!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --> b</pre>", b"<pre>a b</pre>"); eval(
b"<pre>a <!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --> b</pre>",
b"<pre>a b</pre>",
);
eval(b"&a<!-- akd--sj\n <!-- \t\0f--ajk--df->lafj -->mp", b"&amp"); eval(b"&a<!-- akd--sj\n <!-- \t\0f--ajk--df->lafj -->mp", b"&amp");
eval(b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>", b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>"); eval(
b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>",
b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>",
);
} }
#[test] #[test]
@ -434,30 +586,54 @@ fn test_processing_instructions() {
#[test] #[test]
fn test_js_minification() { fn test_js_minification() {
eval_with_js_min(b"<script>let a = 1;</script>", b"<script>let a=1;</script>"); eval_with_js_min(b"<script>let a = 1;</script>", b"<script>let a=1;</script>");
eval_with_js_min(br#" eval_with_js_min(
br#"
<script>let a = 1;</script> <script>let a = 1;</script>
<script>let b = 2;</script> <script>let b = 2;</script>
"#, b"<script>let a=1;</script><script>let b=2;</script>"); "#,
eval_with_js_min(b"<scRIPt type=text/plain> alert(1.00000); </scripT>", b"<script type=text/plain> alert(1.00000); </script>"); b"<script>let a=1;</script><script>let b=2;</script>",
eval_with_js_min(br#" );
eval_with_js_min(
b"<scRIPt type=text/plain> alert(1.00000); </scripT>",
b"<script type=text/plain> alert(1.00000); </script>",
);
eval_with_js_min(
br#"
<script> <script>
// This is a comment. // This is a comment.
let a = 1; let a = 1;
</script> </script>
"#, b"<script>let a=1;</script>"); "#,
b"<script>let a=1;</script>",
);
} }
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
#[test] #[test]
fn test_js_minification_unintentional_closing_tag() { fn test_js_minification_unintentional_closing_tag() {
eval_with_js_min(br#"<script>let a = "</" + "script>";</script>"#, br#"<script>let a="<\/script>";</script>"#); eval_with_js_min(
eval_with_js_min(br#"<script>let a = "</S" + "cRiPT>";</script>"#, br#"<script>let a="<\/ScRiPT>";</script>"#); br#"<script>let a = "</" + "script>";</script>"#,
eval_with_js_min(br#"<script>let a = "\u003c/script>";</script>"#, br#"<script>let a="<\/script>";</script>"#); br#"<script>let a="<\/script>";</script>"#,
eval_with_js_min(br#"<script>let a = "\u003c/scrIPt>";</script>"#, br#"<script>let a="<\/scrIPt>";</script>"#); );
eval_with_js_min(
br#"<script>let a = "</S" + "cRiPT>";</script>"#,
br#"<script>let a="<\/ScRiPT>";</script>"#,
);
eval_with_js_min(
br#"<script>let a = "\u003c/script>";</script>"#,
br#"<script>let a="<\/script>";</script>"#,
);
eval_with_js_min(
br#"<script>let a = "\u003c/scrIPt>";</script>"#,
br#"<script>let a="<\/scrIPt>";</script>"#,
);
} }
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
#[test] #[test]
fn test_css_minification() { fn test_css_minification() {
eval_with_css_min(b"<style>div { color: yellow }</style>", b"<style>div{color:#ff0}</style>"); eval_with_css_min(
b"<style>div { color: yellow }</style>",
b"<style>div{color:#ff0}</style>",
);
} }