Implement c14n; minify doctypes; minify viewport tags

This commit is contained in:
Wilson Lin 2021-08-10 02:56:48 +10:00
parent e6637b2495
commit d427d2753a
24 changed files with 405 additions and 57 deletions

View File

@ -15,19 +15,25 @@ module.exports = {
code = `x{${code}}`;
}
code = esbuild.transformSync(code, {
charset: "utf8",
legalComments: "none",
loader: "css",
minify: true,
sourcemap: false,
}).code;
if (type === "inline") {
code = code.slice(2, -1);
code = code.trim().slice(2, -1);
}
return code;
},
esbuildJs: (code) =>
esbuild.transformSync(code, {
charset: "utf8",
legalComments: "none",
loader: "js",
minify: true,
sourcemap: false,
}).code,
run: (minifierFn) => {

2
debug/diff/c14n/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/Cargo.lock
/target/

View File

@ -0,0 +1,8 @@
[package]
publish = false
name = "c14n"
version = "0.0.1"
edition = "2018"
[dependencies]
minify-html = { path = "../../../rust/main" }

View File

@ -0,0 +1,7 @@
# c14n
Parse HTML from stdin and write a canonical HTML document to stdout. Useful to preprocess documents for diffing:
- Sort attributes by name.
- Decode all entities, then re-encode only special characters consistently.
- Make tag and attribute names lowercase.

View File

@ -0,0 +1,9 @@
use std::io::{stdin, stdout, Read};
use minify_html::canonicalise;
fn main() {
let mut src = Vec::new();
stdin().read_to_end(&mut src).unwrap();
canonicalise(&mut stdout(), &src).unwrap();
}

14
debug/diff/canonicalise Executable file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -Eeuo pipefail
pushd "$(dirname "$0")" >/dev/null
cargo build --manifest-path c14n/Cargo.toml --release
for f in outputs/*/*; do
src=$(cat "$f")
c14n/target/release/c14n <<< "$src" > "$f"
done
popd >/dev/null

1
format
View File

@ -10,6 +10,7 @@ for dir in \
bench/runners/minify-html \
bench/runners/minify-html-onepass \
cli \
debug/diff/c14n \
fuzz \
fuzz/process \
java \

View File

@ -1,5 +1,5 @@
<!DOCTYPE html>
<html>
<!doctypehtml>
<html=1>
<head>
<meta charset="utf-8">
<title><title></titl></title>

View File

@ -76,10 +76,10 @@ fn test_no_whitespace_minification() {
fn test_parsing_omitted_closing_tag() {
eval(b"<html>", b"<html>");
eval(b" <html>\n", b"<html>");
eval(b" <!doctype html> <html>\n", b"<!doctype html><html>");
eval(b" <!doctypehtml> <html>\n", b"<!doctypehtml><html>");
eval(
b"<!doctype html><html><div> <p>Foo</div></html>",
b"<!doctype html><html><div><p>Foo</div>",
b"<!doctypehtml><html><div> <p>Foo</div></html>",
b"<!doctypehtml><html><div><p>Foo</div>",
);
}

View File

@ -37,6 +37,20 @@ pub fn collapse_whitespace(val: &mut Vec<u8>) {
val.truncate(write);
}
pub fn remove_all_whitespace(val: &mut Vec<u8>) {
let mut write = 0;
for i in 0..val.len() {
let c = val[i];
if WHITESPACE[c] {
// Skip this character.
continue;
};
val[write] = c;
write += 1;
}
val.truncate(write);
}
pub fn is_all_whitespace(val: &[u8]) -> bool {
for &c in val {
if !WHITESPACE[c] {

140
rust/main/src/ast/c14n.rs Normal file
View File

@ -0,0 +1,140 @@
use std::io::Write;
use aho_corasick::{AhoCorasickBuilder, MatchKind};
use lazy_static::lazy_static;
use crate::ast::{ElementClosingTag, NodeData};
use crate::common::pattern::Replacer;
lazy_static! {
static ref TEXT_REPLACER: Replacer = Replacer::new(
AhoCorasickBuilder::new()
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
.build(vec![b"&".to_vec(), b"<".to_vec(),]),
vec![b"&amp;".to_vec(), b"&lt;".to_vec(),],
);
static ref DOUBLE_QUOTED_REPLACER: Replacer = Replacer::new(
AhoCorasickBuilder::new()
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
.build(vec![b"&".to_vec(), b"\"".to_vec(),]),
vec![b"&amp;".to_vec(), b"&#34;".to_vec(),],
);
static ref SINGLE_QUOTED_REPLACER: Replacer = Replacer::new(
AhoCorasickBuilder::new()
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
.build(vec![b"&".to_vec(), b"'".to_vec(),]),
vec![b"&amp;".to_vec(), b"&#39;".to_vec(),],
);
static ref UNQUOTED_REPLACER: Replacer = Replacer::new(
AhoCorasickBuilder::new()
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
.build(vec![
b"&".to_vec(),
b">".to_vec(),
b"\"".to_vec(),
b"'".to_vec(),
b"\x09".to_vec(),
b"\x0a".to_vec(),
b"\x0c".to_vec(),
b"\x0d".to_vec(),
b"\x20".to_vec(),
]),
vec![
b"&amp;".to_vec(),
b"&gt;".to_vec(),
b"&#34;".to_vec(),
b"&#39;".to_vec(),
b"&#9;".to_vec(),
b"&#10;".to_vec(),
b"&#12;".to_vec(),
b"&#13;".to_vec(),
b"&#32;".to_vec(),
],
);
}
pub fn c14n_serialise_ast<T: Write>(out: &mut T, node: &NodeData) -> std::io::Result<()> {
match node {
NodeData::Bang { code, .. } => {
out.write_all(b"<!")?;
out.write_all(code)?;
out.write_all(b">")?;
}
NodeData::Comment { code, .. } => {
out.write_all(b"<!--")?;
out.write_all(code)?;
out.write_all(b"-->")?;
}
NodeData::Doctype { legacy, .. } => {
out.write_all(b"<!DOCTYPE html")?;
if !legacy.is_empty() {
out.write_all(b" ")?;
out.write_all(legacy)?;
};
out.write_all(b">")?;
}
NodeData::Element {
attributes,
closing_tag,
children,
name,
..
} => {
out.write_all(b"<")?;
out.write_all(name)?;
let mut attrs_sorted = attributes.iter().collect::<Vec<_>>();
attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0));
for (name, value) in attrs_sorted.iter() {
out.write_all(b" ")?;
out.write_all(name)?;
if !value.value.is_empty() {
out.write_all(b"=")?;
match value.quote {
Some(b'"') => {
out.write_all(b"\"")?;
out.write_all(&DOUBLE_QUOTED_REPLACER.replace_all(&value.value))?;
out.write_all(b"\"")?;
}
Some(b'\'') => {
out.write_all(b"'")?;
out.write_all(&SINGLE_QUOTED_REPLACER.replace_all(&value.value))?;
out.write_all(b"'")?;
}
None => {
out.write_all(&UNQUOTED_REPLACER.replace_all(&value.value))?;
}
_ => unreachable!(),
};
};
}
if closing_tag == &ElementClosingTag::SelfClosing {
out.write_all(b" /")?;
};
out.write_all(b">")?;
for c in children {
c14n_serialise_ast(out, c)?;
}
if closing_tag == &ElementClosingTag::Present {
out.write_all(b"</")?;
out.write_all(name)?;
out.write_all(b">")?;
};
}
NodeData::Instruction { code, .. } => {
out.write_all(b"<?")?;
out.write_all(code)?;
out.write_all(b"?>")?;
}
NodeData::ScriptOrStyleContent { code, .. } => {
out.write_all(code)?;
}
NodeData::Text { value } => {
out.write_all(&TEXT_REPLACER.replace_all(value))?;
}
};
Ok(())
}

View File

@ -4,6 +4,8 @@ use std::str::from_utf8;
use crate::common::spec::tag::ns::Namespace;
pub mod c14n;
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum ElementClosingTag {
Omitted,
@ -19,6 +21,32 @@ pub enum ScriptOrStyleLang {
JS,
}
pub struct AttrVal {
// For serialisation only, not used for equality or value.
pub quote: Option<u8>,
pub value: Vec<u8>,
}
impl AttrVal {
pub fn as_slice(&self) -> &[u8] {
self.value.as_slice()
}
}
impl Debug for AttrVal {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(from_utf8(&self.value).unwrap())
}
}
impl PartialEq for AttrVal {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Eq for AttrVal {}
// Derive Eq for testing.
#[derive(Eq, PartialEq)]
pub enum NodeData {
@ -32,8 +60,13 @@ pub enum NodeData {
// If the source unexpectedly ended before `-->`, we can't add it, as otherwise output could be longer than source.
ended: bool,
},
Doctype {
legacy: Vec<u8>,
// If the source unexpectedly ended before `>`, we can't add it, as otherwise output could be longer than source.
ended: bool,
},
Element {
attributes: HashMap<Vec<u8>, Vec<u8>>,
attributes: HashMap<Vec<u8>, AttrVal>,
children: Vec<NodeData>,
// If the source doesn't have a closing tag, then we can't add one, as otherwise output could be longer than source.
closing_tag: ElementClosingTag,
@ -59,10 +92,6 @@ pub enum NodeData {
},
}
fn str(bytes: &[u8]) -> &str {
from_utf8(bytes).unwrap()
}
impl Debug for NodeData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
@ -76,6 +105,11 @@ impl Debug for NodeData {
.field("code", &from_utf8(code).unwrap().to_string())
.field("ended", ended)
.finish(),
NodeData::Doctype { legacy, ended } => f
.debug_struct("Doctype")
.field("legacy", &from_utf8(legacy).unwrap().to_string())
.field("ended", ended)
.finish(),
NodeData::Element {
attributes,
children,
@ -86,9 +120,9 @@ impl Debug for NodeData {
} => f
.debug_struct("Element")
.field("tag", &{
let mut out = format!("{:?}:{}", namespace, str(name));
let mut out = format!("{:?}:{}", namespace, from_utf8(name).unwrap());
for (n, v) in attributes {
out.push_str(format!(" {}={}", str(n), str(v)).as_str());
out.push_str(format!(" {}={:?}", from_utf8(n).unwrap(), v).as_str());
}
out
})
@ -109,7 +143,7 @@ impl Debug for NodeData {
.field("code", &from_utf8(code).unwrap().to_string())
.field("lang", lang)
.finish(),
NodeData::Text { value } => f.write_str(str(value)),
NodeData::Text { value } => f.write_str(from_utf8(value).unwrap()),
}
}
}

View File

@ -1,3 +1,6 @@
use std::io::Write;
use crate::ast::c14n::c14n_serialise_ast;
pub use crate::cfg::Cfg;
use crate::common::spec::tag::ns::Namespace;
use crate::common::spec::tag::EMPTY_SLICE;
@ -39,3 +42,12 @@ pub fn minify(src: &[u8], cfg: &Cfg) -> Vec<u8> {
minify_content(cfg, &mut out, false, EMPTY_SLICE, parsed.children);
out
}
pub fn canonicalise<T: Write>(out: &mut T, src: &[u8]) -> std::io::Result<()> {
let mut code = Code::new(src);
let parsed = parse_content(&mut code, Namespace::Html, EMPTY_SLICE, EMPTY_SLICE);
for c in parsed.children {
c14n_serialise_ast(out, &c)?;
}
Ok(())
}

View File

@ -11,7 +11,9 @@ use crate::common::gen::codepoints::DIGIT;
use crate::common::pattern::Replacer;
use crate::common::spec::script::JAVASCRIPT_MIME_TYPES;
use crate::common::spec::tag::ns::Namespace;
use crate::common::whitespace::{collapse_whitespace, left_trim, right_trim};
use crate::common::whitespace::{
collapse_whitespace, left_trim, remove_all_whitespace, right_trim,
};
use crate::entity::encode::encode_entities;
use crate::Cfg;
@ -184,8 +186,8 @@ fn build_whatwg_unquoted_replacer() -> Replacer {
lazy_static! {
static ref DOUBLE_QUOTED_REPLACER: Replacer = build_double_quoted_replacer();
static ref SINGLE_QUOTED_REPLACER: Replacer = build_single_quoted_replacer();
static ref UNQUOTED_QUOTED_REPLACER: Replacer = build_unquoted_replacer();
static ref WHATWG_UNQUOTED_QUOTED_REPLACER: Replacer = build_whatwg_unquoted_replacer();
static ref UNQUOTED_REPLACER: Replacer = build_unquoted_replacer();
static ref WHATWG_UNQUOTED_REPLACER: Replacer = build_whatwg_unquoted_replacer();
}
pub struct AttrMinifiedValue {
@ -244,12 +246,12 @@ pub fn encode_unquoted(val: &[u8], whatwg: bool) -> AttrMinifiedValue {
AttrMinifiedValue {
quoted: false,
prefix: b"",
data: WHATWG_UNQUOTED_QUOTED_REPLACER.replace_all(val),
data: WHATWG_UNQUOTED_REPLACER.replace_all(val),
start: 0,
suffix: b"",
}
} else {
let data = UNQUOTED_QUOTED_REPLACER.replace_all(val);
let data = UNQUOTED_REPLACER.replace_all(val);
let prefix: &'static [u8] = match data.get(0) {
Some(b'"') => match data.get(1) {
Some(&c2) if DIGIT[c2] || c2 == b';' => b"&#34;",
@ -282,6 +284,8 @@ pub fn minify_attr(
cfg: &Cfg,
ns: Namespace,
tag: &[u8],
// True if element is <meta> and has an attribute `name` equal to `viewport`.
is_meta_viewport: bool,
name: &[u8],
mut value_raw: Vec<u8>,
) -> AttrMinified {
@ -293,6 +297,10 @@ pub fn minify_attr(
let redundant_if_empty = attr_cfg.filter(|attr| attr.redundant_if_empty).is_some();
let default_value = attr_cfg.and_then(|attr| attr.default_value);
if is_meta_viewport {
remove_all_whitespace(&mut value_raw);
};
// Trim before checking is_boolean as the entire attribute could be redundant post-minification.
if should_collapse_and_trim {
right_trim(&mut value_raw);

View File

@ -13,6 +13,7 @@ use crate::entity::encode::encode_entities;
use crate::minify::bang::minify_bang;
use crate::minify::comment::minify_comment;
use crate::minify::css::minify_css;
use crate::minify::doctype::minify_doctype;
use crate::minify::element::minify_element;
use crate::minify::instruction::minify_instruction;
use crate::minify::js::minify_js;
@ -117,6 +118,7 @@ pub fn minify_content(
match c {
NodeData::Bang { code, ended } => minify_bang(cfg, out, &code, ended),
NodeData::Comment { code, ended } => minify_comment(cfg, out, &code, ended),
NodeData::Doctype { legacy, ended } => minify_doctype(cfg, out, &legacy, ended),
NodeData::Element {
attributes,
children,

View File

@ -0,0 +1,12 @@
use crate::cfg::Cfg;
pub fn minify_doctype(_cfg: &Cfg, out: &mut Vec<u8>, legacy: &[u8], ended: bool) {
out.extend_from_slice(b"<!doctypehtml");
if !legacy.is_empty() {
out.push(b' ');
out.extend_from_slice(legacy);
};
if ended {
out.extend_from_slice(b">");
};
}

View File

@ -1,6 +1,6 @@
use std::collections::HashMap;
use crate::ast::{ElementClosingTag, NodeData};
use crate::ast::{AttrVal, ElementClosingTag, NodeData};
use crate::cfg::Cfg;
use crate::common::spec::tag::ns::Namespace;
use crate::common::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
@ -19,7 +19,7 @@ pub fn minify_element(
// If the last node of the parent is an element and it's this one.
is_last_child_text_or_element_node: bool,
tag_name: &[u8],
attributes: HashMap<Vec<u8>, Vec<u8>>,
attributes: HashMap<Vec<u8>, AttrVal>,
closing_tag: ElementClosingTag,
children: Vec<NodeData>,
) {
@ -27,8 +27,14 @@ pub fn minify_element(
let mut quoted = Vec::new();
let mut unquoted = Vec::new();
let is_meta_viewport = tag_name == b"meta"
&& attributes
.get(b"name".as_ref())
.filter(|a| a.value.eq_ignore_ascii_case(b"viewport"))
.is_some();
for (name, value) in attributes {
match minify_attr(cfg, ns, tag_name, &name, value) {
match minify_attr(cfg, ns, tag_name, is_meta_viewport, &name, value.value) {
AttrMinified::Redundant => {}
a @ AttrMinified::NoValue => unquoted.push((name, a)),
AttrMinified::Value(v) => {

View File

@ -3,6 +3,7 @@ pub mod bang;
pub mod comment;
pub mod content;
pub mod css;
pub mod doctype;
pub mod element;
pub mod esbuild;
pub mod instruction;

View File

@ -11,21 +11,23 @@ use crate::entity::decode::decode_entities;
use crate::parse::bang::parse_bang;
use crate::parse::comment::parse_comment;
use crate::parse::content::ContentType::*;
use crate::parse::doctype::parse_doctype;
use crate::parse::element::{parse_element, parse_tag, peek_tag_name};
use crate::parse::instruction::parse_instruction;
use crate::parse::Code;
#[derive(Copy, Clone, Eq, PartialEq)]
enum ContentType {
Text,
OpeningTag,
ClosingTag,
Instruction,
Bang,
ClosingTag,
Comment,
Doctype,
IgnoredTag,
Instruction,
MalformedLeftChevronSlash,
OmittedClosingTag,
IgnoredTag,
OpeningTag,
Text,
}
fn maybe_ignore_html_head_body(
@ -94,6 +96,9 @@ fn build_content_type_matcher() -> (AhoCorasick, Vec<ContentType>) {
patterns.push(b"<?".to_vec());
types.push(ContentType::Instruction);
patterns.push(b"<!doctype".to_vec());
types.push(ContentType::Doctype);
patterns.push(b"<!".to_vec());
types.push(ContentType::Bang);
@ -102,6 +107,7 @@ fn build_content_type_matcher() -> (AhoCorasick, Vec<ContentType>) {
(
AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
// Keep in sync with order of CONTENT_TYPE_FROM_PATTERN.
@ -182,6 +188,7 @@ pub fn parse_content(
Instruction => nodes.push(parse_instruction(code)),
Bang => nodes.push(parse_bang(code)),
Comment => nodes.push(parse_comment(code)),
Doctype => nodes.push(parse_doctype(code)),
MalformedLeftChevronSlash => code.shift(match memrchr(b'>', code.as_slice()) {
Some(m) => m + 1,
None => code.rem(),

View File

@ -0,0 +1,24 @@
use memchr::memchr;
use crate::ast::NodeData;
use crate::common::gen::codepoints::WHITESPACE;
use crate::parse::Code;
pub fn parse_doctype(code: &mut Code) -> NodeData {
debug_assert!(code.as_slice()[..9].eq_ignore_ascii_case(b"<!doctype"));
code.shift(9);
code.shift_while_in_lookup(WHITESPACE);
code.shift_if_next_seq_case_insensitive(b"html");
code.shift_while_in_lookup(WHITESPACE);
let (len, matched) = match memchr(b'>', code.as_slice()) {
Some(m) => (m, 1),
None => (code.rem(), 0),
};
let data = code.copy_and_shift(len);
// It might be EOF.
code.shift(matched);
NodeData::Doctype {
legacy: data,
ended: matched > 0,
}
}

View File

@ -1,6 +1,6 @@
use std::collections::HashMap;
use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang};
use crate::ast::{AttrVal, ElementClosingTag, NodeData, ScriptOrStyleLang};
use crate::common::gen::codepoints::{
ATTR_QUOTE, DOUBLE_QUOTE, NOT_UNQUOTED_ATTR_VAL_CHAR, SINGLE_QUOTE, TAG_NAME_CHAR, WHITESPACE,
WHITESPACE_OR_SLASH, WHITESPACE_OR_SLASH_OR_EQUALS_OR_RIGHT_CHEVRON,
@ -37,7 +37,7 @@ pub fn peek_tag_name(code: &mut Code) -> Vec<u8> {
// Derive Eq for testing.
#[derive(Eq, PartialEq)]
pub struct ParsedTag {
pub attributes: HashMap<Vec<u8>, Vec<u8>>,
pub attributes: HashMap<Vec<u8>, AttrVal>,
pub name: Vec<u8>,
pub self_closing: bool,
}
@ -48,11 +48,7 @@ impl Debug for ParsedTag {
let mut attrs = self.attributes.iter().collect::<Vec<_>>();
attrs.sort_unstable_by(|a, b| a.0.cmp(b.0));
for (n, v) in attrs {
f.write_fmt(format_args!(
" {}={}",
from_utf8(n).unwrap(),
from_utf8(v).unwrap()
))?;
f.write_fmt(format_args!(" {}={:?}", from_utf8(n).unwrap(), v))?;
}
if self.self_closing {
f.write_str(" />")?;
@ -65,7 +61,7 @@ impl Debug for ParsedTag {
// TODO Use generics to create version that doesn't create a HashMap.
pub fn parse_tag(code: &mut Code) -> ParsedTag {
let elem_name = parse_tag_name(code);
let mut attributes = HashMap::<Vec<u8>, Vec<u8>>::new();
let mut attributes = HashMap::new();
let self_closing;
loop {
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).
@ -92,7 +88,10 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
let has_value = code.shift_if_next(b'=');
code.shift_while_in_lookup(WHITESPACE);
let attr_value = if !has_value {
Vec::new()
AttrVal {
quote: None,
value: Vec::new(),
}
} else {
// TODO Replace ATTR_QUOTE with direct comparison.
let attr_delim = code.shift_if_next_in_lookup(ATTR_QUOTE);
@ -111,7 +110,10 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
// It might not be next if EOF (i.e. attribute value not closed).
code.shift_if_next(c);
};
attr_value
AttrVal {
quote: attr_delim,
value: attr_value,
}
};
attributes.insert(attr_name, attr_value);
}

View File

@ -3,6 +3,7 @@ use crate::common::gen::codepoints::Lookup;
pub mod bang;
pub mod comment;
pub mod content;
pub mod doctype;
pub mod element;
pub mod instruction;
pub mod script;
@ -63,6 +64,20 @@ impl<'c> Code<'c> {
}
}
pub fn shift_if_next_seq_case_insensitive(&mut self, seq: &[u8]) -> bool {
if self
.code
.get(self.next..self.next + seq.len())
.filter(|n| n.eq_ignore_ascii_case(seq))
.is_some()
{
self.next += seq.len();
true
} else {
false
}
}
pub fn shift_if_next_in_lookup(&mut self, lookup: &'static Lookup) -> Option<u8> {
let c = self.code.get(self.next).filter(|&&n| lookup[n]).copied();
if c.is_some() {

View File

@ -1,11 +1,18 @@
use std::collections::HashMap;
use crate::ast::{ElementClosingTag, NodeData};
use crate::ast::{AttrVal, ElementClosingTag, NodeData};
use crate::common::spec::tag::ns::Namespace;
use crate::common::spec::tag::EMPTY_SLICE;
use crate::parse::element::{parse_element, parse_tag, ParsedTag};
use crate::parse::Code;
fn val(v: &[u8]) -> AttrVal {
AttrVal {
value: v.to_vec(),
quote: None,
}
}
#[test]
fn test_parse_tag() {
let mut code = Code::new(
@ -20,20 +27,20 @@ fn test_parse_tag() {
tag,
ParsedTag {
attributes: {
let mut map = HashMap::<Vec<u8>, Vec<u8>>::new();
map.insert(b"type".to_vec(), b"password".to_vec());
map.insert(b"\"a\"".to_vec(), b" b ".to_vec());
map.insert(b":cd".to_vec(), b"".to_vec());
map.insert(b"e".to_vec(), b"".to_vec());
map.insert(b"=fg".to_vec(), b"/\\h".to_vec());
map.insert(b"i".to_vec(), b"".to_vec());
map.insert(b"j".to_vec(), b"".to_vec());
map.insert(b"k".to_vec(), b"".to_vec());
map.insert(b"l".to_vec(), b"".to_vec());
map.insert(b"m".to_vec(), b"n=o".to_vec());
map.insert(b"q".to_vec(), b"=\\r/s/".to_vec());
map.insert(b"t]".to_vec(), b"/u".to_vec());
map.insert(b"w".to_vec(), b"//".to_vec());
let mut map = HashMap::<Vec<u8>, AttrVal>::new();
map.insert(b"type".to_vec(), val(b"password"));
map.insert(b"\"a\"".to_vec(), val(b" b "));
map.insert(b":cd".to_vec(), val(b""));
map.insert(b"e".to_vec(), val(b""));
map.insert(b"=fg".to_vec(), val(b"/\\h"));
map.insert(b"i".to_vec(), val(b""));
map.insert(b"j".to_vec(), val(b""));
map.insert(b"k".to_vec(), val(b""));
map.insert(b"l".to_vec(), val(b""));
map.insert(b"m".to_vec(), val(b"n=o"));
map.insert(b"q".to_vec(), val(b"=\\r/s/"));
map.insert(b"t]".to_vec(), val(b"/u"));
map.insert(b"w".to_vec(), val(b"//"));
map
},
name: b"input".to_vec(),
@ -50,8 +57,8 @@ fn test_parse_element() {
elem,
NodeData::Element {
attributes: {
let mut map = HashMap::<Vec<u8>, Vec<u8>>::new();
map.insert(b"b".to_vec(), br#"\"c\""#.to_vec());
let mut map = HashMap::<Vec<u8>, AttrVal>::new();
map.insert(b"b".to_vec(), val(br#"\"c\""#));
map
},
children: vec![],

View File

@ -33,6 +33,15 @@ fn eval_without_keep_html_head(src: &'static [u8], expected: &'static [u8]) -> (
eval_with_cfg(src, expected, &Cfg::new());
}
#[test]
fn test_minification_of_doctype() {
eval(b"<!DOCTYPE html><html>", b"<!doctypehtml><html>");
eval(
b"<!DOCTYPE html SYSTEM 'about:legacy-compat'><html>",
b"<!doctypehtml SYSTEM 'about:legacy-compat'><html>",
);
}
#[test]
fn test_parsing_extra_head_tag() {
// Extra `<head>` in `<label>` should be dropped, so whitespace around `<head>` should be joined and therefore trimmed due to `<label>` whitespace rules.
@ -57,17 +66,17 @@ fn test_removal_of_html_and_head_opening_tags() {
// Even though `<head>` is dropped, it's still parsed, so its content is still subject to `<head>` whitespace minification rules.
eval_without_keep_html_head(
b"<!DOCTYPE html><html><head> <meta> <body>",
b"<!DOCTYPE html><meta><body>",
b"<!doctypehtml><meta><body>",
);
// The tag should not be dropped if it has attributes.
eval_without_keep_html_head(
b"<!DOCTYPE html><html lang=en><head> <meta> <body>",
b"<!DOCTYPE html><html lang=en><meta><body>",
b"<!doctypehtml><html lang=en><meta><body>",
);
// The tag should be dropped if it has no attributes after minification.
eval_without_keep_html_head(
b"<!DOCTYPE html><html style=' '><head> <meta> <body>",
b"<!DOCTYPE html><meta><body>",
b"<!doctypehtml><meta><body>",
);
}
@ -113,6 +122,14 @@ fn test_attr_whatwg_unquoted_value_minification() {
);
}
#[test]
fn test_viewport_attr_minification() {
eval(
b"<meta name=viewport content='width=device-width, initial-scale=1'>",
b"<meta content=width=device-width,initial-scale=1 name=viewport>",
);
}
#[cfg(feature = "js-esbuild")]
#[test]
fn test_style_attr_minification() {