rustfmt
This commit is contained in:
parent
5a259a8ead
commit
1a930a170d
12
src/lib.rs
12
src/lib.rs
|
@ -1,9 +1,9 @@
|
|||
use crate::cfg::Cfg;
|
||||
use crate::minify::content::minify_content;
|
||||
use crate::parse::Code;
|
||||
use crate::parse::content::parse_content;
|
||||
use crate::spec::tag::EMPTY_TAG_NAME;
|
||||
use crate::parse::Code;
|
||||
use crate::spec::tag::ns::Namespace;
|
||||
use crate::spec::tag::EMPTY_TAG_NAME;
|
||||
|
||||
mod ast;
|
||||
mod cfg;
|
||||
|
@ -37,7 +37,13 @@ mod tests;
|
|||
/// ```
|
||||
pub fn minify(src: &[u8], cfg: &Cfg) -> Vec<u8> {
|
||||
let mut code = Code::new(src);
|
||||
let parsed = parse_content(cfg, &mut code, Namespace::Html, EMPTY_TAG_NAME, EMPTY_TAG_NAME);
|
||||
let parsed = parse_content(
|
||||
cfg,
|
||||
&mut code,
|
||||
Namespace::Html,
|
||||
EMPTY_TAG_NAME,
|
||||
EMPTY_TAG_NAME,
|
||||
);
|
||||
let mut out = Vec::with_capacity(src.len());
|
||||
minify_content(cfg, &mut out, EMPTY_TAG_NAME, &parsed.children);
|
||||
out
|
||||
|
|
|
@ -13,7 +13,7 @@ fn build_double_quoted_replacer() -> Replacer {
|
|||
for c in "0123456789;".bytes() {
|
||||
patterns.push(vec![b'"', c]);
|
||||
replacements.push(vec![b'&', b'#', b'3', b'4', b';', c]);
|
||||
};
|
||||
}
|
||||
patterns.push(b"\"".to_vec());
|
||||
replacements.push(b""".to_vec());
|
||||
|
||||
|
@ -35,7 +35,7 @@ fn build_single_quoted_replacer() -> Replacer {
|
|||
for c in "0123456789;".bytes() {
|
||||
patterns.push(vec![b'\'', c]);
|
||||
replacements.push(vec![b'&', b'#', b'3', b'9', b';', c]);
|
||||
};
|
||||
}
|
||||
patterns.push(b"'".to_vec());
|
||||
replacements.push(b"'".to_vec());
|
||||
|
||||
|
@ -71,12 +71,12 @@ fn build_unquoted_replacer() -> Replacer {
|
|||
ent.push(c);
|
||||
ent
|
||||
});
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
for &(ws, rep) in WS {
|
||||
patterns.push(vec![ws]);
|
||||
replacements.push(rep.to_vec());
|
||||
};
|
||||
}
|
||||
|
||||
// Replace all `>` with `>`, unless the chevron is followed by a semicolon,
|
||||
// in which case add a semicolon to the encoded entity.
|
||||
|
@ -148,11 +148,7 @@ pub fn minify_attr_val(val: &[u8]) -> Vec<u8> {
|
|||
},
|
||||
_ => b"",
|
||||
};
|
||||
let start = if !first_char_encoded.is_empty() {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let start = if !first_char_encoded.is_empty() { 1 } else { 0 };
|
||||
MinifiedVal {
|
||||
prefix: b"",
|
||||
data: res,
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
use crate::cfg::Cfg;
|
||||
|
||||
pub fn minify_bang(
|
||||
cfg: &Cfg,
|
||||
out: &mut Vec<u8>,
|
||||
code: &[u8],
|
||||
ended: bool,
|
||||
) -> () {
|
||||
pub fn minify_bang(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8], ended: bool) -> () {
|
||||
if !cfg.remove_bangs {
|
||||
out.extend_from_slice(b"<!");
|
||||
out.extend_from_slice(&code);
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
use crate::cfg::Cfg;
|
||||
|
||||
pub fn minify_comment(
|
||||
cfg: &Cfg,
|
||||
out: &mut Vec<u8>,
|
||||
code: &[u8],
|
||||
ended: bool,
|
||||
) -> () {
|
||||
pub fn minify_comment(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8], ended: bool) -> () {
|
||||
if !cfg.remove_comments {
|
||||
out.extend_from_slice(b"<!--");
|
||||
out.extend_from_slice(&code);
|
||||
|
|
|
@ -24,7 +24,7 @@ fn build_chevron_replacer() -> Replacer {
|
|||
patterns.push(vec![b'<', c]);
|
||||
replacements.push(vec![b'&', b'L', b'T', c]);
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
Replacer::new(
|
||||
AhoCorasickBuilder::new()
|
||||
|
@ -52,7 +52,7 @@ pub fn minify_content(
|
|||
NodeData::Text { .. } | NodeData::Element { .. } => break,
|
||||
_ => index_of_last_text_or_elem_child -= 1,
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
let mut previous_sibling_element: &[u8] = EMPTY_TAG_NAME;
|
||||
for (i, c) in nodes.iter().enumerate() {
|
||||
|
@ -84,11 +84,8 @@ pub fn minify_content(
|
|||
ScriptOrStyleLang::Data => out.extend_from_slice(code),
|
||||
ScriptOrStyleLang::JS => minify_js(cfg, out, code),
|
||||
},
|
||||
NodeData::Text { value } => out.extend_from_slice(
|
||||
&CHEVRON_REPLACER.replace_all(
|
||||
&encode_ampersands(value, false)
|
||||
)
|
||||
),
|
||||
NodeData::Text { value } => out
|
||||
.extend_from_slice(&CHEVRON_REPLACER.replace_all(&encode_ampersands(value, false))),
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,8 +12,8 @@ use crate::minify::instruction::minify_instruction;
|
|||
use crate::minify::js::minify_js;
|
||||
use crate::pattern::Replacer;
|
||||
use crate::spec::entity::encode::encode_ampersands;
|
||||
use crate::spec::tag::EMPTY_TAG_NAME;
|
||||
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
|
||||
use crate::spec::tag::EMPTY_TAG_NAME;
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum AttrType {
|
||||
|
@ -35,10 +35,9 @@ pub fn minify_element(
|
|||
closing_tag: ElementClosingTag,
|
||||
children: &[NodeData],
|
||||
) -> () {
|
||||
let can_omit_closing_tag = cfg.omit_closing_tags && (
|
||||
can_omit_as_before(previous_sibling_element, tag_name)
|
||||
|| (is_last_child_text_or_element_node && can_omit_as_last_node(parent, tag_name))
|
||||
);
|
||||
let can_omit_closing_tag = cfg.omit_closing_tags
|
||||
&& (can_omit_as_before(previous_sibling_element, tag_name)
|
||||
|| (is_last_child_text_or_element_node && can_omit_as_last_node(parent, tag_name)));
|
||||
|
||||
out.push(b'<');
|
||||
out.extend_from_slice(tag_name);
|
||||
|
@ -50,13 +49,9 @@ pub fn minify_element(
|
|||
out.extend_from_slice(name);
|
||||
if !value.is_empty() {
|
||||
out.push(b'=');
|
||||
out.extend_from_slice(
|
||||
&minify_attr_val(
|
||||
&encode_ampersands(value, true),
|
||||
),
|
||||
);
|
||||
out.extend_from_slice(&minify_attr_val(&encode_ampersands(value, true)));
|
||||
};
|
||||
};
|
||||
}
|
||||
if closing_tag == ElementClosingTag::SelfClosing {
|
||||
if last_attr == AttrType::Unquoted {
|
||||
out.push(b' ');
|
||||
|
@ -72,7 +67,8 @@ pub fn minify_element(
|
|||
|
||||
minify_content(cfg, out, tag_name, children);
|
||||
|
||||
if closing_tag != ElementClosingTag::Present || (cfg.omit_closing_tags && can_omit_closing_tag) {
|
||||
if closing_tag != ElementClosingTag::Present || (cfg.omit_closing_tags && can_omit_closing_tag)
|
||||
{
|
||||
return;
|
||||
};
|
||||
out.extend_from_slice(b"</");
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
use crate::cfg::Cfg;
|
||||
|
||||
pub fn minify_instruction(
|
||||
cfg: &Cfg,
|
||||
out: &mut Vec<u8>,
|
||||
code: &[u8],
|
||||
ended: bool,
|
||||
) -> () {
|
||||
pub fn minify_instruction(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8], ended: bool) -> () {
|
||||
if !cfg.remove_processing_instructions {
|
||||
out.extend_from_slice(b"<?");
|
||||
out.extend_from_slice(&code);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::ast::NodeData;
|
||||
use crate::Cfg;
|
||||
use crate::parse::Code;
|
||||
use crate::Cfg;
|
||||
use memchr::memchr;
|
||||
|
||||
pub fn parse_bang(cfg: &Cfg, code: &mut Code) -> NodeData {
|
||||
|
|
|
@ -2,8 +2,8 @@ use aho_corasick::AhoCorasick;
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::NodeData;
|
||||
use crate::Cfg;
|
||||
use crate::parse::Code;
|
||||
use crate::Cfg;
|
||||
|
||||
lazy_static! {
|
||||
static ref COMMENT_END: AhoCorasick = AhoCorasick::new(&["-->"]);
|
||||
|
|
|
@ -3,17 +3,17 @@ use lazy_static::lazy_static;
|
|||
use memchr::memrchr;
|
||||
|
||||
use crate::ast::NodeData;
|
||||
use crate::Cfg;
|
||||
use crate::parse::bang::parse_bang;
|
||||
use crate::parse::Code;
|
||||
use crate::parse::comment::parse_comment;
|
||||
use crate::parse::content::ContentType::*;
|
||||
use crate::parse::element::{parse_element, parse_tag, peek_tag_name};
|
||||
use crate::parse::instruction::parse_instruction;
|
||||
use crate::parse::Code;
|
||||
use crate::spec::entity::decode::decode_entities;
|
||||
use crate::spec::tag::ns::Namespace;
|
||||
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
|
||||
use crate::spec::tag::void::VOID_TAGS;
|
||||
use crate::Cfg;
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum ContentType {
|
||||
|
@ -43,7 +43,8 @@ lazy_static! {
|
|||
}
|
||||
|
||||
// Keep in sync with order of patterns in CONTENT_TYPE_PATTERN.
|
||||
static CONTENT_TYPE_FROM_PATTERN: &'static [ContentType] = &[OpeningTag, ClosingTag, Instruction, Bang, Comment];
|
||||
static CONTENT_TYPE_FROM_PATTERN: &'static [ContentType] =
|
||||
&[OpeningTag, ClosingTag, Instruction, Bang, Comment];
|
||||
|
||||
pub struct ParsedContent {
|
||||
pub children: Vec<NodeData>,
|
||||
|
@ -51,7 +52,13 @@ pub struct ParsedContent {
|
|||
}
|
||||
|
||||
// Use empty slice for `grandparent` or `parent` if none.
|
||||
pub fn parse_content(cfg: &Cfg, code: &mut Code, ns: Namespace, grandparent: &[u8], parent: &[u8]) -> ParsedContent {
|
||||
pub fn parse_content(
|
||||
cfg: &Cfg,
|
||||
code: &mut Code,
|
||||
ns: Namespace,
|
||||
grandparent: &[u8],
|
||||
parent: &[u8],
|
||||
) -> ParsedContent {
|
||||
// We assume the closing tag has been omitted until we see one explicitly before EOF (or it has been omitted as per the spec).
|
||||
let mut closing_tag_omitted = true;
|
||||
let mut nodes = Vec::<NodeData>::new();
|
||||
|
@ -80,7 +87,9 @@ pub fn parse_content(cfg: &Cfg, code: &mut Code, ns: Namespace, grandparent: &[u
|
|||
if name.is_empty() {
|
||||
// Malformed code, drop until and including next `>`.
|
||||
typ = MalformedLeftChevronSlash;
|
||||
} else if grandparent == name.as_slice() && can_omit_as_last_node(grandparent, parent) {
|
||||
} else if grandparent == name.as_slice()
|
||||
&& can_omit_as_last_node(grandparent, parent)
|
||||
{
|
||||
// The upcoming closing tag implicitly closes the current element e.g. `<tr><td>(current position)</tr>`.
|
||||
// This DOESN'T handle when grandparent doesn't exist (represented by an empty slice). However, in that case it's irrelevant, as it would mean we would be at EOF, and our parser simply auto-closes everything anyway. (Normally we'd have to determine if `<p>Hello` is an error or allowed.)
|
||||
typ = OmittedClosingTag;
|
||||
|
@ -119,7 +128,7 @@ pub fn parse_content(cfg: &Cfg, code: &mut Code, ns: Namespace, grandparent: &[u
|
|||
}
|
||||
ClosingTagForVoidElement => drop(parse_tag(code)),
|
||||
};
|
||||
};
|
||||
}
|
||||
debug_assert_eq!(text_len, 0);
|
||||
ParsedContent {
|
||||
children: nodes,
|
||||
|
|
|
@ -1,17 +1,20 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang};
|
||||
use crate::Cfg;
|
||||
use crate::gen::codepoints::{ATTR_QUOTE, DOUBLE_QUOTE, NOT_UNQUOTED_ATTR_VAL_CHAR, SINGLE_QUOTE, TAG_NAME_CHAR, WHITESPACE, WHITESPACE_OR_SLASH};
|
||||
use crate::parse::Code;
|
||||
use crate::gen::codepoints::{
|
||||
ATTR_QUOTE, DOUBLE_QUOTE, NOT_UNQUOTED_ATTR_VAL_CHAR, SINGLE_QUOTE, TAG_NAME_CHAR, WHITESPACE,
|
||||
WHITESPACE_OR_SLASH,
|
||||
};
|
||||
use crate::parse::content::{parse_content, ParsedContent};
|
||||
use crate::parse::script::parse_script_content;
|
||||
use crate::parse::style::parse_style_content;
|
||||
use crate::parse::textarea::parse_textarea_content;
|
||||
use crate::parse::Code;
|
||||
use crate::spec::entity::decode::decode_entities;
|
||||
use crate::spec::script::JAVASCRIPT_MIME_TYPES;
|
||||
use crate::spec::tag::ns::Namespace;
|
||||
use crate::spec::tag::void::VOID_TAGS;
|
||||
use crate::Cfg;
|
||||
|
||||
fn parse_tag_name(code: &mut Code) -> Vec<u8> {
|
||||
debug_assert!(code.str().starts_with(b"<"));
|
||||
|
@ -66,7 +69,10 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
|
|||
None => NOT_UNQUOTED_ATTR_VAL_CHAR,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let attr_value = decode_entities(code.slice_and_shift_while_not_in_lookup(attr_delim_pred), true);
|
||||
let attr_value = decode_entities(
|
||||
code.slice_and_shift_while_not_in_lookup(attr_delim_pred),
|
||||
true,
|
||||
);
|
||||
if let Some(c) = attr_delim {
|
||||
// It might not be next if EOF (i.e. attribute value not closed).
|
||||
code.shift_if_next(c);
|
||||
|
@ -74,7 +80,7 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
|
|||
attr_value
|
||||
};
|
||||
attributes.insert(attr_name, attr_value);
|
||||
};
|
||||
}
|
||||
ParsedTag {
|
||||
attributes,
|
||||
name: elem_name,
|
||||
|
@ -121,12 +127,14 @@ pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -
|
|||
} = match elem_name.as_slice() {
|
||||
// TODO to_vec call allocates every time?
|
||||
b"script" => match attributes.get(&b"type".to_vec()) {
|
||||
Some(mime) if !JAVASCRIPT_MIME_TYPES.contains(mime.as_slice()) => parse_script_content(cfg, code, ScriptOrStyleLang::Data),
|
||||
Some(mime) if !JAVASCRIPT_MIME_TYPES.contains(mime.as_slice()) => {
|
||||
parse_script_content(cfg, code, ScriptOrStyleLang::Data)
|
||||
}
|
||||
_ => parse_script_content(cfg, code, ScriptOrStyleLang::JS),
|
||||
},
|
||||
b"style" => parse_style_content(cfg, code),
|
||||
b"textarea" => parse_textarea_content(cfg, code),
|
||||
_ => parse_content(cfg, code, child_ns, parent, &elem_name)
|
||||
_ => parse_content(cfg, code, child_ns, parent, &elem_name),
|
||||
};
|
||||
|
||||
if !closing_tag_omitted {
|
||||
|
|
|
@ -2,8 +2,8 @@ use aho_corasick::AhoCorasick;
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::NodeData;
|
||||
use crate::Cfg;
|
||||
use crate::parse::Code;
|
||||
use crate::Cfg;
|
||||
|
||||
lazy_static! {
|
||||
static ref INSTRUCTION_END: AhoCorasick = AhoCorasick::new(&["?>"]);
|
||||
|
|
|
@ -19,10 +19,7 @@ pub struct Checkpoint(usize);
|
|||
|
||||
impl<'c> Code<'c> {
|
||||
pub fn new(code: &[u8]) -> Code {
|
||||
Code {
|
||||
code,
|
||||
next: 0,
|
||||
}
|
||||
Code { code, next: 0 }
|
||||
}
|
||||
|
||||
pub fn str(&self) -> &[u8] {
|
||||
|
@ -59,7 +56,12 @@ impl<'c> Code<'c> {
|
|||
}
|
||||
|
||||
pub fn shift_if_next_seq(&mut self, seq: &'static [u8]) -> bool {
|
||||
if self.code.get(self.next..self.next + seq.len()).filter(|&n| n == seq).is_some() {
|
||||
if self
|
||||
.code
|
||||
.get(self.next..self.next + seq.len())
|
||||
.filter(|&n| n == seq)
|
||||
.is_some()
|
||||
{
|
||||
self.next += seq.len();
|
||||
true
|
||||
} else {
|
||||
|
@ -88,7 +90,7 @@ impl<'c> Code<'c> {
|
|||
Some(&c) if lookup[c] => len += 1,
|
||||
_ => break,
|
||||
};
|
||||
};
|
||||
}
|
||||
self.copy_and_shift(len)
|
||||
}
|
||||
|
||||
|
@ -99,7 +101,7 @@ impl<'c> Code<'c> {
|
|||
Some(&c) if !lookup[c] => len += 1,
|
||||
_ => break,
|
||||
};
|
||||
};
|
||||
}
|
||||
self.slice_and_shift(len)
|
||||
}
|
||||
|
||||
|
@ -118,7 +120,7 @@ impl<'c> Code<'c> {
|
|||
}
|
||||
_ => break,
|
||||
};
|
||||
};
|
||||
}
|
||||
last
|
||||
}
|
||||
|
||||
|
|
|
@ -3,9 +3,9 @@ use aho_corasick::AhoCorasickBuilder;
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::{NodeData, ScriptOrStyleLang};
|
||||
use crate::Cfg;
|
||||
use crate::parse::Code;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
use crate::Cfg;
|
||||
|
||||
lazy_static! {
|
||||
static ref END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
|
@ -20,6 +20,9 @@ pub fn parse_script_content(cfg: &Cfg, code: &mut Code, lang: ScriptOrStyleLang)
|
|||
};
|
||||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![NodeData::ScriptOrStyleContent { code: code.copy_and_shift(len), lang }],
|
||||
children: vec![NodeData::ScriptOrStyleContent {
|
||||
code: code.copy_and_shift(len),
|
||||
lang,
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,9 +3,9 @@ use aho_corasick::AhoCorasickBuilder;
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::{NodeData, ScriptOrStyleLang};
|
||||
use crate::Cfg;
|
||||
use crate::parse::Code;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
use crate::Cfg;
|
||||
|
||||
lazy_static! {
|
||||
static ref END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
|
@ -20,11 +20,9 @@ pub fn parse_style_content(cfg: &Cfg, code: &mut Code) -> ParsedContent {
|
|||
};
|
||||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![
|
||||
NodeData::ScriptOrStyleContent {
|
||||
code: code.copy_and_shift(len),
|
||||
lang: ScriptOrStyleLang::CSS,
|
||||
},
|
||||
],
|
||||
children: vec![NodeData::ScriptOrStyleContent {
|
||||
code: code.copy_and_shift(len),
|
||||
lang: ScriptOrStyleLang::CSS,
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,10 +3,10 @@ use aho_corasick::AhoCorasickBuilder;
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::NodeData;
|
||||
use crate::Cfg;
|
||||
use crate::parse::Code;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
use crate::spec::entity::decode::decode_entities;
|
||||
use crate::Cfg;
|
||||
|
||||
lazy_static! {
|
||||
static ref END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
|
@ -21,6 +21,8 @@ pub fn parse_textarea_content(cfg: &Cfg, code: &mut Code) -> ParsedContent {
|
|||
};
|
||||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![NodeData::Text { value: decode_entities(code.slice_and_shift(len), false) }],
|
||||
children: vec![NodeData::Text {
|
||||
value: decode_entities(code.slice_and_shift(len), false),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ impl<V: 'static + Copy> TrieNode<V> {
|
|||
if node.value.is_some() {
|
||||
break;
|
||||
};
|
||||
};
|
||||
}
|
||||
(node, pos)
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,7 @@ impl<V: 'static + Copy> TrieNode<V> {
|
|||
Some(v) => value = Some(TrieNodeMatch::Found { len: pos, value: v }),
|
||||
None => {}
|
||||
};
|
||||
};
|
||||
}
|
||||
value.unwrap_or(TrieNodeMatch::NotFound { reached: pos })
|
||||
}
|
||||
}
|
||||
|
@ -77,7 +77,10 @@ pub struct Replacer {
|
|||
|
||||
impl Replacer {
|
||||
pub fn new(searcher: AhoCorasick, replacements: Vec<Vec<u8>>) -> Replacer {
|
||||
Replacer { searcher, replacements }
|
||||
Replacer {
|
||||
searcher,
|
||||
replacements,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn replace_all(&self, src: &[u8]) -> Vec<u8> {
|
||||
|
|
|
@ -17,8 +17,10 @@ use std::char::from_u32;
|
|||
|
||||
use memchr::memchr;
|
||||
|
||||
use crate::gen::codepoints::{ALPHANUMERIC_OR_EQUALS, DIGIT, HEX_DIGIT, Lookup, LOWER_HEX_ALPHA, UPPER_HEX_ALPHA};
|
||||
use crate::gen::entities::{ENTITY, EntityType};
|
||||
use crate::gen::codepoints::{
|
||||
Lookup, ALPHANUMERIC_OR_EQUALS, DIGIT, HEX_DIGIT, LOWER_HEX_ALPHA, UPPER_HEX_ALPHA,
|
||||
};
|
||||
use crate::gen::entities::{EntityType, ENTITY};
|
||||
use crate::pattern::TrieNodeMatch;
|
||||
|
||||
enum Decoded {
|
||||
|
@ -44,7 +46,7 @@ fn parse_numeric_entity(
|
|||
// Skip initial zeros.
|
||||
while code.get(read_next).filter(|c| **c == b'0').is_some() {
|
||||
read_next += 1;
|
||||
};
|
||||
}
|
||||
// Browser will still continue to consume digits past max_digits.
|
||||
loop {
|
||||
match code.get(read_next) {
|
||||
|
@ -56,7 +58,7 @@ fn parse_numeric_entity(
|
|||
}
|
||||
_ => break,
|
||||
};
|
||||
};
|
||||
}
|
||||
// Semicolon is required by spec but seems to be optional in actual browser behaviour.
|
||||
if let Some(b';') = code.get(read_next) {
|
||||
read_next += 1;
|
||||
|
@ -79,7 +81,10 @@ fn parse_entity(code: &[u8], in_attr_val: bool) -> ParsedEntity {
|
|||
read_len: reached,
|
||||
decoded: Decoded::Ignored,
|
||||
},
|
||||
TrieNodeMatch::Found { len: match_len, value } => match value {
|
||||
TrieNodeMatch::Found {
|
||||
len: match_len,
|
||||
value,
|
||||
} => match value {
|
||||
EntityType::Dec => parse_numeric_entity(
|
||||
// Skip past '&#'. Note that match_len is 3 as it matches '&#[0-9]'.
|
||||
&code[2..],
|
||||
|
@ -91,16 +96,24 @@ fn parse_entity(code: &[u8], in_attr_val: bool) -> ParsedEntity {
|
|||
// Skip past '&#x'. Note that match_len is 4 as it matches '&#x[0-9a-fA-F]'.
|
||||
&code[3..],
|
||||
HEX_DIGIT,
|
||||
|value, c| value.wrapping_mul(16).wrapping_add(match c {
|
||||
c if DIGIT[c] => (c - b'0') as u32,
|
||||
c if LOWER_HEX_ALPHA[c] => 10 + (c - b'a') as u32,
|
||||
c if UPPER_HEX_ALPHA[c] => 10 + (c - b'A') as u32,
|
||||
_ => unreachable!(),
|
||||
}),
|
||||
|value, c| {
|
||||
value.wrapping_mul(16).wrapping_add(match c {
|
||||
c if DIGIT[c] => (c - b'0') as u32,
|
||||
c if LOWER_HEX_ALPHA[c] => 10 + (c - b'a') as u32,
|
||||
c if UPPER_HEX_ALPHA[c] => 10 + (c - b'A') as u32,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
},
|
||||
6,
|
||||
),
|
||||
EntityType::Named(decoded) => {
|
||||
if in_attr_val && code[match_len - 1] != b';' && code.get(match_len).filter(|&&c| ALPHANUMERIC_OR_EQUALS[c]).is_some() {
|
||||
if in_attr_val
|
||||
&& code[match_len - 1] != b';'
|
||||
&& code
|
||||
.get(match_len)
|
||||
.filter(|&&c| ALPHANUMERIC_OR_EQUALS[c])
|
||||
.is_some()
|
||||
{
|
||||
// Don't decode if named entity is inside an attribute value and doesn't end with semicolon but is followed by an alphanumeric or `=` character.
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state.
|
||||
ParsedEntity {
|
||||
|
@ -129,10 +142,7 @@ pub fn decode_entities(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
|
|||
res.extend_from_slice(&code[..before]);
|
||||
code = &code[before..];
|
||||
if matched {
|
||||
let ParsedEntity {
|
||||
decoded,
|
||||
read_len,
|
||||
} = parse_entity(code, in_attr_val);
|
||||
let ParsedEntity { decoded, read_len } = parse_entity(code, in_attr_val);
|
||||
match decoded {
|
||||
Decoded::Numeric(c) => {
|
||||
let mut encoded = [0u8; 4];
|
||||
|
@ -140,10 +150,10 @@ pub fn decode_entities(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
|
|||
res.extend_from_slice(&encoded);
|
||||
}
|
||||
Decoded::Ignored => res.extend_from_slice(&code[..read_len]),
|
||||
Decoded::Named(s) => res.extend_from_slice(s)
|
||||
Decoded::Named(s) => res.extend_from_slice(s),
|
||||
};
|
||||
code = &code[read_len..];
|
||||
};
|
||||
};
|
||||
}
|
||||
res
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use memchr::memchr;
|
||||
|
||||
use crate::gen::codepoints::ALPHANUMERIC_OR_EQUALS;
|
||||
use crate::gen::entities::{ENTITY, EntityType};
|
||||
use crate::gen::entities::{EntityType, ENTITY};
|
||||
use crate::pattern::TrieNodeMatch;
|
||||
|
||||
pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
|
||||
|
@ -19,9 +19,14 @@ pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
|
|||
TrieNodeMatch::NotFound { reached } => reached,
|
||||
TrieNodeMatch::Found { len, value } => {
|
||||
match value {
|
||||
EntityType::Named(_) if in_attr_val
|
||||
&& code[len - 1] != b';'
|
||||
&& code.get(len).filter(|&&c| ALPHANUMERIC_OR_EQUALS[c]).is_some() => {
|
||||
EntityType::Named(_)
|
||||
if in_attr_val
|
||||
&& code[len - 1] != b';'
|
||||
&& code
|
||||
.get(len)
|
||||
.filter(|&&c| ALPHANUMERIC_OR_EQUALS[c])
|
||||
.is_some() =>
|
||||
{
|
||||
// A named entity inside an attribute value that doesn't end with semicolon but is followed by an alphanumeric or `=` character is not decoded, so we don't need to encode.
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state.
|
||||
}
|
||||
|
@ -36,6 +41,6 @@ pub fn encode_ampersands(mut code: &[u8], in_attr_val: bool) -> Vec<u8> {
|
|||
res.extend_from_slice(&code[..len]);
|
||||
code = &code[len..];
|
||||
};
|
||||
};
|
||||
}
|
||||
res
|
||||
}
|
||||
|
|
|
@ -3,4 +3,4 @@ pub mod omission;
|
|||
pub mod void;
|
||||
pub mod whitespace;
|
||||
|
||||
pub static EMPTY_TAG_NAME: &'static[u8] = &[];
|
||||
pub static EMPTY_TAG_NAME: &'static [u8] = &[];
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use lazy_static::lazy_static;
|
||||
use std::collections::{HashSet, HashMap};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
// Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission.
|
||||
// TODO Opening tags
|
||||
|
@ -161,14 +161,15 @@ lazy_static! {
|
|||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
||||
followed_by: {
|
||||
let mut s = HashSet::<&'static [u8]>::new();
|
||||
s.insert(b"optgroup");
|
||||
s
|
||||
},
|
||||
is_last: ClosingTagOmissionRuleIfLast::Always,
|
||||
};
|
||||
static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule =
|
||||
ClosingTagOmissionRule {
|
||||
followed_by: {
|
||||
let mut s = HashSet::<&'static [u8]>::new();
|
||||
s.insert(b"optgroup");
|
||||
s
|
||||
},
|
||||
is_last: ClosingTagOmissionRuleIfLast::Always,
|
||||
};
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
|
@ -275,7 +276,8 @@ lazy_static! {
|
|||
|
||||
// Use an empty slice for `parent` if no parent.
|
||||
pub fn can_omit_as_last_node(parent: &[u8], child: &[u8]) -> bool {
|
||||
CLOSING_TAG_OMISSION_RULES.get(child)
|
||||
CLOSING_TAG_OMISSION_RULES
|
||||
.get(child)
|
||||
.filter(|r| match &r.is_last {
|
||||
ClosingTagOmissionRuleIfLast::Always => true,
|
||||
ClosingTagOmissionRuleIfLast::Never => false,
|
||||
|
@ -286,7 +288,8 @@ pub fn can_omit_as_last_node(parent: &[u8], child: &[u8]) -> bool {
|
|||
|
||||
// Use an empty slice for `before` if no previous sibling element.
|
||||
pub fn can_omit_as_before(before: &[u8], after: &[u8]) -> bool {
|
||||
CLOSING_TAG_OMISSION_RULES.get(before)
|
||||
CLOSING_TAG_OMISSION_RULES
|
||||
.get(before)
|
||||
.filter(|r| r.followed_by.contains(after))
|
||||
.is_some()
|
||||
}
|
||||
|
|
|
@ -166,7 +166,10 @@ lazy_static! {
|
|||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get_whitespace_minification_for_tag(tag_name: Option<&[u8]>, descendant_of_pre: bool) -> &'static WhitespaceMinification {
|
||||
pub fn get_whitespace_minification_for_tag(
|
||||
tag_name: Option<&[u8]>,
|
||||
descendant_of_pre: bool,
|
||||
) -> &'static WhitespaceMinification {
|
||||
if descendant_of_pre {
|
||||
WHITESPACE_SENSITIVE
|
||||
} else {
|
||||
|
|
316
src/tests/mod.rs
316
src/tests/mod.rs
|
@ -3,9 +3,16 @@ fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) -> () {
|
|||
let mut code = src.to_vec();
|
||||
match super::with_friendly_error(&mut code, cfg) {
|
||||
Ok(len) => {
|
||||
assert_eq!(std::str::from_utf8(&code[..len]).unwrap(), std::str::from_utf8(expected).unwrap());
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&code[..len]).unwrap(),
|
||||
std::str::from_utf8(expected).unwrap()
|
||||
);
|
||||
}
|
||||
Err(super::FriendlyError { code_context, message, .. }) => {
|
||||
Err(super::FriendlyError {
|
||||
code_context,
|
||||
message,
|
||||
..
|
||||
}) => {
|
||||
println!("{}", message);
|
||||
println!("{}", code_context);
|
||||
assert!(false);
|
||||
|
@ -16,41 +23,60 @@ fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) -> () {
|
|||
#[cfg(test)]
|
||||
fn _eval_error(src: &'static [u8], expected: ErrorType, cfg: &super::Cfg) -> () {
|
||||
let mut code = src.to_vec();
|
||||
assert_eq!(super::in_place(&mut code, cfg).unwrap_err().error_type, expected);
|
||||
assert_eq!(
|
||||
super::in_place(&mut code, cfg).unwrap_err().error_type,
|
||||
expected
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn eval(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||
_eval(src, expected, &super::Cfg {
|
||||
minify_js: false,
|
||||
minify_css: false,
|
||||
});
|
||||
_eval(
|
||||
src,
|
||||
expected,
|
||||
&super::Cfg {
|
||||
minify_js: false,
|
||||
minify_css: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn eval_error(src: &'static [u8], expected: ErrorType) -> () {
|
||||
_eval_error(src, expected, &super::Cfg {
|
||||
minify_js: false,
|
||||
minify_css: false,
|
||||
});
|
||||
_eval_error(
|
||||
src,
|
||||
expected,
|
||||
&super::Cfg {
|
||||
minify_js: false,
|
||||
minify_css: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||
_eval(src, expected, &super::Cfg {
|
||||
minify_js: true,
|
||||
minify_css: false,
|
||||
});
|
||||
_eval(
|
||||
src,
|
||||
expected,
|
||||
&super::Cfg {
|
||||
minify_js: true,
|
||||
minify_css: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||
_eval(src, expected, &super::Cfg {
|
||||
minify_js: false,
|
||||
minify_css: true,
|
||||
});
|
||||
_eval(
|
||||
src,
|
||||
expected,
|
||||
&super::Cfg {
|
||||
minify_js: false,
|
||||
minify_css: true,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -75,7 +101,10 @@ fn test_collapse_destroy_whole_and_trim_whitespace() {
|
|||
eval(b"<ul> \n  </ul>", b"<ul></ul>");
|
||||
eval(b"<ul> \n a </ul>", b"<ul>a</ul>");
|
||||
eval(b"<ul> \n a b </ul>", b"<ul>a b</ul>");
|
||||
eval(b"<ul> \n a<pre></pre> <pre></pre>b </ul>", b"<ul>a<pre></pre><pre></pre>b</ul>");
|
||||
eval(
|
||||
b"<ul> \n a<pre></pre> <pre></pre>b </ul>",
|
||||
b"<ul>a<pre></pre><pre></pre>b</ul>",
|
||||
);
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<uL> \n a b </UL>", b"<ul>a b</ul>");
|
||||
}
|
||||
|
@ -83,25 +112,40 @@ fn test_collapse_destroy_whole_and_trim_whitespace() {
|
|||
#[test]
|
||||
fn test_no_whitespace_minification() {
|
||||
eval(b"<pre> \n  \t </pre>", b"<pre> \n \t </pre>");
|
||||
eval(b"<textarea> \n  \t </textarea>", b"<textarea> \n \t </textarea>");
|
||||
eval(
|
||||
b"<textarea> \n  \t </textarea>",
|
||||
b"<textarea> \n \t </textarea>",
|
||||
);
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<pRe> \n  \t </PRE>", b"<pre> \n \t </pre>");
|
||||
eval(b"<pre> <span> 1 2 </span> </pre>", b"<pre> <span> 1 2 </span> </pre>");
|
||||
eval(b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>", b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>");
|
||||
eval(b"<div> <pre> <span> 1 <pre>\n</pre> 2 </span> </pre> </div>", b"<div><pre> <span> 1 <pre>\n</pre> 2 </span> </pre></div>");
|
||||
eval(br#"<pre><code>fn main() {
|
||||
eval(
|
||||
b"<pre> <span> 1 2 </span> </pre>",
|
||||
b"<pre> <span> 1 2 </span> </pre>",
|
||||
);
|
||||
eval(
|
||||
b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>",
|
||||
b"<pre> <span> 1 <pre>\n</pre> 2 </span> </pre>",
|
||||
);
|
||||
eval(
|
||||
b"<div> <pre> <span> 1 <pre>\n</pre> 2 </span> </pre> </div>",
|
||||
b"<div><pre> <span> 1 <pre>\n</pre> 2 </span> </pre></div>",
|
||||
);
|
||||
eval(
|
||||
br#"<pre><code>fn main() {
|
||||
println!("Hello, world!");
|
||||
<span>loop {
|
||||
println!("Hello, world!");
|
||||
}</span>
|
||||
}
|
||||
</code></pre>"#, br#"<pre><code>fn main() {
|
||||
</code></pre>"#,
|
||||
br#"<pre><code>fn main() {
|
||||
println!("Hello, world!");
|
||||
<span>loop {
|
||||
println!("Hello, world!");
|
||||
}</span>
|
||||
}
|
||||
</code></pre>"#);
|
||||
</code></pre>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -109,7 +153,10 @@ fn test_parsing_omitted_closing_tag() {
|
|||
eval(b"<html>", b"<html>");
|
||||
eval(b" <html>\n", b"<html>");
|
||||
eval(b" <!doctype html> <html>\n", b"<!doctype html><html>");
|
||||
eval(b"<!doctype html><html><div> <p>Foo</div></html>", b"<!doctype html><html><div><p>Foo</div>");
|
||||
eval(
|
||||
b"<!doctype html><html><div> <p>Foo</div></html>",
|
||||
b"<!doctype html><html><div><p>Foo</div>",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -138,19 +185,50 @@ fn test_parsing_with_omitted_tags() {
|
|||
fn test_unmatched_closing_tag() {
|
||||
eval_error(b"Hello</p>Goodbye", ErrorType::UnexpectedClosingTag);
|
||||
eval_error(b"Hello<br></br>Goodbye", ErrorType::UnexpectedClosingTag);
|
||||
eval_error(b"<div>Hello</p>Goodbye", ErrorType::ClosingTagMismatch { expected: "div".to_string(), got: "p".to_string() });
|
||||
eval_error(b"<ul><li>a</p>", ErrorType::ClosingTagMismatch { expected: "ul".to_string(), got: "p".to_string() });
|
||||
eval_error(b"<ul><li><rt>a</p>", ErrorType::ClosingTagMismatch { expected: "ul".to_string(), got: "p".to_string() });
|
||||
eval_error(b"<html><head><body><ul><li><rt>a</p>", ErrorType::ClosingTagMismatch { expected: "ul".to_string(), got: "p".to_string() });
|
||||
eval_error(
|
||||
b"<div>Hello</p>Goodbye",
|
||||
ErrorType::ClosingTagMismatch {
|
||||
expected: "div".to_string(),
|
||||
got: "p".to_string(),
|
||||
},
|
||||
);
|
||||
eval_error(
|
||||
b"<ul><li>a</p>",
|
||||
ErrorType::ClosingTagMismatch {
|
||||
expected: "ul".to_string(),
|
||||
got: "p".to_string(),
|
||||
},
|
||||
);
|
||||
eval_error(
|
||||
b"<ul><li><rt>a</p>",
|
||||
ErrorType::ClosingTagMismatch {
|
||||
expected: "ul".to_string(),
|
||||
got: "p".to_string(),
|
||||
},
|
||||
);
|
||||
eval_error(
|
||||
b"<html><head><body><ul><li><rt>a</p>",
|
||||
ErrorType::ClosingTagMismatch {
|
||||
expected: "ul".to_string(),
|
||||
got: "p".to_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_removal_of_optional_tags() {
|
||||
eval(b"<ul><li>1</li><li>2</li><li>3</li></ul>", b"<ul><li>1<li>2<li>3</ul>");
|
||||
eval(
|
||||
b"<ul><li>1</li><li>2</li><li>3</li></ul>",
|
||||
b"<ul><li>1<li>2<li>3</ul>",
|
||||
);
|
||||
eval(b"<rt></rt>", b"<rt>");
|
||||
eval(b"<rt></rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
|
||||
eval(
|
||||
b"<rt></rt><rp>1</rp><div></div>",
|
||||
b"<rt><rp>1</rp><div></div>",
|
||||
);
|
||||
eval(b"<div><rt></rt></div>", b"<div><rt></div>");
|
||||
eval(br#"
|
||||
eval(
|
||||
br#"
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
|
@ -158,7 +236,9 @@ fn test_removal_of_optional_tags() {
|
|||
<body>
|
||||
</body>
|
||||
</html>
|
||||
"#, b"<html><head><body>");
|
||||
"#,
|
||||
b"<html><head><body>",
|
||||
);
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<RT></rt>", b"<rt>");
|
||||
}
|
||||
|
@ -168,7 +248,10 @@ fn test_removal_of_optional_closing_p_tag() {
|
|||
eval(b"<p></p><address></address>", b"<p><address></address>");
|
||||
eval(b"<p></p>", b"<p>");
|
||||
eval(b"<map><p></p></map>", b"<map><p></p></map>");
|
||||
eval(b"<map><p></p><address></address></map>", b"<map><p><address></address></map>");
|
||||
eval(
|
||||
b"<map><p></p><address></address></map>",
|
||||
b"<map><p><address></address></map>",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -186,7 +269,10 @@ fn test_attr_single_quoted_value_minification() {
|
|||
eval(b"<a b=\""hello\"></a>", b"<a b='\"hello'></a>");
|
||||
eval(b"<a b='\"hello'></a>", b"<a b='\"hello'></a>");
|
||||
eval(b"<a b='/>a'></a>", b"<a b=\"/>a\"></a>");
|
||||
eval(b"<a b= he"llo ></a>", b"<a b=' he\"llo '></a>");
|
||||
eval(
|
||||
b"<a b= he"llo ></a>",
|
||||
b"<a b=' he\"llo '></a>",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -203,7 +289,10 @@ fn test_attr_unquoted_value_minification() {
|
|||
#[test]
|
||||
fn test_class_attr_value_minification() {
|
||||
eval(b"<a class= c></a>", b"<a class=c></a>");
|
||||
eval(b"<a class= c  d ></a>", b"<a class=\"c d\"></a>");
|
||||
eval(
|
||||
b"<a class= c  d ></a>",
|
||||
b"<a class=\"c d\"></a>",
|
||||
);
|
||||
eval(b"<a class=    ></a>", b"<a></a>");
|
||||
eval(b"<a class=\" c\n \n \"></a>", b"<a class=c></a>");
|
||||
eval(b"<a class=\" c\n \nd \"></a>", b"<a class=\"c d\"></a>");
|
||||
|
@ -218,13 +307,34 @@ fn test_class_attr_value_minification() {
|
|||
#[test]
|
||||
fn test_d_attr_value_minification() {
|
||||
eval(b"<svg><path d= c /></svg>", b"<svg><path d=c /></svg>");
|
||||
eval(b"<svg><path d= c  d  /></svg>", b"<svg><path d=\"c d\"/></svg>");
|
||||
eval(b"<svg><path d=     /></svg>", b"<svg><path/></svg>");
|
||||
eval(b"<svg><path d=\" c\n \n \" /></svg>", b"<svg><path d=c /></svg>");
|
||||
eval(b"<svg><path d=\" c\n \nd \" /></svg>", b"<svg><path d=\"c d\"/></svg>");
|
||||
eval(b"<svg><path d=\" \n \n \" /></svg>", b"<svg><path/></svg>");
|
||||
eval(b"<svg><path d=' c\n \n ' /></svg>", b"<svg><path d=c /></svg>");
|
||||
eval(b"<svg><path d=' c\n \nd ' /></svg>", b"<svg><path d=\"c d\"/></svg>");
|
||||
eval(
|
||||
b"<svg><path d= c  d  /></svg>",
|
||||
b"<svg><path d=\"c d\"/></svg>",
|
||||
);
|
||||
eval(
|
||||
b"<svg><path d=     /></svg>",
|
||||
b"<svg><path/></svg>",
|
||||
);
|
||||
eval(
|
||||
b"<svg><path d=\" c\n \n \" /></svg>",
|
||||
b"<svg><path d=c /></svg>",
|
||||
);
|
||||
eval(
|
||||
b"<svg><path d=\" c\n \nd \" /></svg>",
|
||||
b"<svg><path d=\"c d\"/></svg>",
|
||||
);
|
||||
eval(
|
||||
b"<svg><path d=\" \n \n \" /></svg>",
|
||||
b"<svg><path/></svg>",
|
||||
);
|
||||
eval(
|
||||
b"<svg><path d=' c\n \n ' /></svg>",
|
||||
b"<svg><path d=c /></svg>",
|
||||
);
|
||||
eval(
|
||||
b"<svg><path d=' c\n \nd ' /></svg>",
|
||||
b"<svg><path d=\"c d\"/></svg>",
|
||||
);
|
||||
eval(b"<svg><path d=' \n \n ' /></svg>", b"<svg><path/></svg>");
|
||||
// Attribute names should be case insensitive.
|
||||
eval(b"<svg><path D=' \n \n ' /></svg>", b"<svg><path/></svg>");
|
||||
|
@ -263,12 +373,27 @@ fn test_default_attr_value_removal() {
|
|||
|
||||
#[test]
|
||||
fn test_script_type_attr_value_removal() {
|
||||
eval(b"<script type=\"application/ecmascript\"></script>", b"<script></script>");
|
||||
eval(b"<script type=\"application/javascript\"></script>", b"<script></script>");
|
||||
eval(b"<script type=\"text/jscript\"></script>", b"<script></script>");
|
||||
eval(b"<script type=\"text/plain\"></script>", b"<script type=text/plain></script>");
|
||||
eval(
|
||||
b"<script type=\"application/ecmascript\"></script>",
|
||||
b"<script></script>",
|
||||
);
|
||||
eval(
|
||||
b"<script type=\"application/javascript\"></script>",
|
||||
b"<script></script>",
|
||||
);
|
||||
eval(
|
||||
b"<script type=\"text/jscript\"></script>",
|
||||
b"<script></script>",
|
||||
);
|
||||
eval(
|
||||
b"<script type=\"text/plain\"></script>",
|
||||
b"<script type=text/plain></script>",
|
||||
);
|
||||
// Tag and attribute names should be case insensitive.
|
||||
eval(b"<SCRipt TYPE=\"application/ecmascript\"></SCrIPT>", b"<script></script>");
|
||||
eval(
|
||||
b"<SCRipt TYPE=\"application/ecmascript\"></SCrIPT>",
|
||||
b"<script></script>",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -282,9 +407,15 @@ fn test_empty_attr_value_removal() {
|
|||
|
||||
#[test]
|
||||
fn test_space_between_attrs_minification() {
|
||||
eval(b"<div a=\" \" b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
|
||||
eval(
|
||||
b"<div a=\" \" b=\" \"></div>",
|
||||
b"<div a=\" \"b=\" \"></div>",
|
||||
);
|
||||
eval(b"<div a=' ' b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
|
||||
eval(b"<div a=  b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
|
||||
eval(
|
||||
b"<div a=  b=\" \"></div>",
|
||||
b"<div a=\" \"b=\" \"></div>",
|
||||
);
|
||||
eval(b"<div a=\"1\" b=\" \"></div>", b"<div a=1 b=\" \"></div>");
|
||||
eval(b"<div a='1' b=\" \"></div>", b"<div a=1 b=\" \"></div>");
|
||||
eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>");
|
||||
|
@ -304,7 +435,10 @@ fn test_hexadecimal_entity_decoding() {
|
|||
eval(b"0", b"0");
|
||||
eval(b"ᅑ", b"\xe1\x85\x91");
|
||||
eval(b"�", b"\xef\xbf\xbd");
|
||||
eval(b"�", b"\xef\xbf\xbd");
|
||||
eval(
|
||||
b"�",
|
||||
b"\xef\xbf\xbd",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -317,7 +451,10 @@ fn test_decimal_entity_decoding() {
|
|||
eval(b"0", b"0");
|
||||
eval(b"ᅑ", b"\xe1\x85\x91");
|
||||
eval(b"�", b"\xef\xbf\xbd");
|
||||
eval(b"�", b"\xef\xbf\xbd");
|
||||
eval(
|
||||
b"�",
|
||||
b"\xef\xbf\xbd",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -337,9 +474,18 @@ fn test_named_entity_decoding() {
|
|||
|
||||
// Named entities not ending with ';' in attr values are not decoded if immediately
|
||||
// followed by an alphanumeric or `=` character. (See parser for more details.)
|
||||
eval(br#"<a href="exam ple?>a=5"></a>"#, br#"<a href="exam ple?>a=5"></a>"#);
|
||||
eval(br#"<a href="exam ple?>=5"></a>"#, br#"<a href="exam ple?>=5"></a>"#);
|
||||
eval(br#"<a href="exam ple?>~5"></a>"#, br#"<a href="exam ple?>~5"></a>"#);
|
||||
eval(
|
||||
br#"<a href="exam ple?>a=5"></a>"#,
|
||||
br#"<a href="exam ple?>a=5"></a>"#,
|
||||
);
|
||||
eval(
|
||||
br#"<a href="exam ple?>=5"></a>"#,
|
||||
br#"<a href="exam ple?>=5"></a>"#,
|
||||
);
|
||||
eval(
|
||||
br#"<a href="exam ple?>~5"></a>"#,
|
||||
br#"<a href="exam ple?>~5"></a>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -419,9 +565,15 @@ fn test_left_chevron_in_content() {
|
|||
|
||||
#[test]
|
||||
fn test_comments_removal() {
|
||||
eval(b"<pre>a <!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --> b</pre>", b"<pre>a b</pre>");
|
||||
eval(
|
||||
b"<pre>a <!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --> b</pre>",
|
||||
b"<pre>a b</pre>",
|
||||
);
|
||||
eval(b"&a<!-- akd--sj\n <!-- \t\0f--ajk--df->lafj -->mp", b"&");
|
||||
eval(b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>", b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>");
|
||||
eval(
|
||||
b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>",
|
||||
b"<script><!-- akd--sj\n <!-- \t\0f--ajk--df->lafj --></script>",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -434,30 +586,54 @@ fn test_processing_instructions() {
|
|||
#[test]
|
||||
fn test_js_minification() {
|
||||
eval_with_js_min(b"<script>let a = 1;</script>", b"<script>let a=1;</script>");
|
||||
eval_with_js_min(br#"
|
||||
eval_with_js_min(
|
||||
br#"
|
||||
<script>let a = 1;</script>
|
||||
<script>let b = 2;</script>
|
||||
"#, b"<script>let a=1;</script><script>let b=2;</script>");
|
||||
eval_with_js_min(b"<scRIPt type=text/plain> alert(1.00000); </scripT>", b"<script type=text/plain> alert(1.00000); </script>");
|
||||
eval_with_js_min(br#"
|
||||
"#,
|
||||
b"<script>let a=1;</script><script>let b=2;</script>",
|
||||
);
|
||||
eval_with_js_min(
|
||||
b"<scRIPt type=text/plain> alert(1.00000); </scripT>",
|
||||
b"<script type=text/plain> alert(1.00000); </script>",
|
||||
);
|
||||
eval_with_js_min(
|
||||
br#"
|
||||
<script>
|
||||
// This is a comment.
|
||||
let a = 1;
|
||||
</script>
|
||||
"#, b"<script>let a=1;</script>");
|
||||
"#,
|
||||
b"<script>let a=1;</script>",
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
#[test]
|
||||
fn test_js_minification_unintentional_closing_tag() {
|
||||
eval_with_js_min(br#"<script>let a = "</" + "script>";</script>"#, br#"<script>let a="<\/script>";</script>"#);
|
||||
eval_with_js_min(br#"<script>let a = "</S" + "cRiPT>";</script>"#, br#"<script>let a="<\/ScRiPT>";</script>"#);
|
||||
eval_with_js_min(br#"<script>let a = "\u003c/script>";</script>"#, br#"<script>let a="<\/script>";</script>"#);
|
||||
eval_with_js_min(br#"<script>let a = "\u003c/scrIPt>";</script>"#, br#"<script>let a="<\/scrIPt>";</script>"#);
|
||||
eval_with_js_min(
|
||||
br#"<script>let a = "</" + "script>";</script>"#,
|
||||
br#"<script>let a="<\/script>";</script>"#,
|
||||
);
|
||||
eval_with_js_min(
|
||||
br#"<script>let a = "</S" + "cRiPT>";</script>"#,
|
||||
br#"<script>let a="<\/ScRiPT>";</script>"#,
|
||||
);
|
||||
eval_with_js_min(
|
||||
br#"<script>let a = "\u003c/script>";</script>"#,
|
||||
br#"<script>let a="<\/script>";</script>"#,
|
||||
);
|
||||
eval_with_js_min(
|
||||
br#"<script>let a = "\u003c/scrIPt>";</script>"#,
|
||||
br#"<script>let a="<\/scrIPt>";</script>"#,
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
#[test]
|
||||
fn test_css_minification() {
|
||||
eval_with_css_min(b"<style>div { color: yellow }</style>", b"<style>div{color:#ff0}</style>");
|
||||
eval_with_css_min(
|
||||
b"<style>div { color: yellow }</style>",
|
||||
b"<style>div{color:#ff0}</style>",
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue