Implement tests

This commit is contained in:
Wilson Lin 2021-08-09 23:11:24 +10:00
parent de3df3b0fb
commit 73a9b2cfb6
10 changed files with 291 additions and 142 deletions

View File

@ -1,4 +1,6 @@
pub mod gen;
pub mod pattern;
pub mod spec;
#[cfg(test)]
mod tests;
pub mod whitespace;

View File

@ -1,35 +1,7 @@
fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
let mut code = src.to_vec();
let min = super::minify(&mut code, cfg);
assert_eq!(
std::str::from_utf8(&min).unwrap(),
std::str::from_utf8(expected).unwrap(),
);
}
fn eval(src: &'static [u8], expected: &'static [u8]) {
eval_with_cfg(src, expected, &super::Cfg::new());
}
fn eval_with_keep_html_head(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = super::Cfg::new();
cfg.keep_html_and_head_opening_tags = true;
eval_with_cfg(src, expected, &cfg);
}
use crate::tests::eval;
#[cfg(feature = "js-esbuild")]
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = super::Cfg::new();
cfg.minify_js = true;
eval_with_cfg(src, expected, &cfg);
}
#[cfg(feature = "js-esbuild")]
fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = super::Cfg::new();
cfg.minify_css = true;
eval_with_cfg(src, expected, &cfg);
}
use crate::tests::{eval_with_css_min, eval_with_js_min};
#[test]
fn test_collapse_whitespace() {
@ -100,31 +72,12 @@ fn test_no_whitespace_minification() {
);
}
#[test]
fn test_parsing_extra_head_tag() {
// Extra `<head>` in `<label>` should be dropped, so whitespace around `<head>` should be joined and therefore trimmed due to `<label>` whitespace rules.
eval_with_keep_html_head(
b"<html><head><meta><head><link><head><body><label> <pre> </pre> <head> </label>",
b"<html><head><meta><link><body><label><pre> </pre></label>",
);
// Same as above except it's a `</head>`, which should get reinterpreted as a `<head>`.
eval_with_keep_html_head(
b"<html><head><meta><head><link><head><body><label> <pre> </pre> </head> </label>",
b"<html><head><meta><link><body><label><pre> </pre></label>",
);
// `<head>` gets implicitly closed by `<body>`, so any following `</head>` should be ignored. (They should be anyway, since `</head>` would not be a valid closing tag.)
eval_with_keep_html_head(
b"<html><head><body><label> </head> </label>",
b"<html><head><body><label></label>",
);
}
#[test]
fn test_parsing_omitted_closing_tag() {
eval_with_keep_html_head(b"<html>", b"<html>");
eval_with_keep_html_head(b" <html>\n", b"<html>");
eval_with_keep_html_head(b" <!doctype html> <html>\n", b"<!doctype html><html>");
eval_with_keep_html_head(
eval(b"<html>", b"<html>");
eval(b" <html>\n", b"<html>");
eval(b" <!doctype html> <html>\n", b"<!doctype html><html>");
eval(
b"<!doctype html><html><div> <p>Foo</div></html>",
b"<!doctype html><html><div><p>Foo</div>",
);
@ -142,56 +95,29 @@ fn test_self_closing_svg_tag_whitespace_removal() {
#[test]
fn test_parsing_with_omitted_tags() {
eval_with_keep_html_head(b"<ul><li>1<li>2<li>3</ul>", b"<ul><li>1<li>2<li>3</ul>");
eval_with_keep_html_head(b"<rt>", b"<rt>");
eval_with_keep_html_head(b"<rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
eval_with_keep_html_head(b"<div><rt></div>", b"<div><rt></div>");
eval_with_keep_html_head(b"<html><head><body>", b"<html><head><body>");
eval_with_keep_html_head(b"<html><head><body>", b"<html><head><body>");
eval(b"<ul><li>1<li>2<li>3</ul>", b"<ul><li>1<li>2<li>3</ul>");
eval(b"<rt>", b"<rt>");
eval(b"<rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
eval(b"<div><rt></div>", b"<div><rt></div>");
eval(b"<html><head><body>", b"<html><head><body>");
eval(b"<html><head><body>", b"<html><head><body>");
// Tag names should be case insensitive.
eval_with_keep_html_head(b"<rt>", b"<rt>");
}
#[test]
fn test_unmatched_closing_tag() {
eval_with_keep_html_head(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
eval_with_keep_html_head(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
eval_with_keep_html_head(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
eval_with_keep_html_head(b"<ul><li>a</p>", b"<ul><li>a<p>");
eval_with_keep_html_head(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
eval_with_keep_html_head(
b"<html><head><body><ul><li><rt>a</p>",
b"<html><head><body><ul><li><rt>a<p>",
);
}
#[test]
fn test_removal_of_html_and_head_opening_tags() {
// Even though `<head>` is dropped, it's still parsed, so its content is still subject to `<head>` whitespace minification rules.
eval(
b"<!DOCTYPE html><html><head> <meta> <body>",
b"<!DOCTYPE html><meta><body>",
);
// The tag should not be dropped if it has attributes.
eval(
b"<!DOCTYPE html><html lang=en><head> <meta> <body>",
b"<!DOCTYPE html><html lang=en><meta><body>",
);
eval(b"<rt>", b"<rt>");
}
#[test]
fn test_removal_of_optional_tags() {
eval_with_keep_html_head(
eval(
b"<ul><li>1</li><li>2</li><li>3</li></ul>",
b"<ul><li>1<li>2<li>3</ul>",
);
eval_with_keep_html_head(b"<rt></rt>", b"<rt>");
eval_with_keep_html_head(
eval(b"<rt></rt>", b"<rt>");
eval(
b"<rt></rt><rp>1</rp><div></div>",
b"<rt><rp>1</rp><div></div>",
);
eval_with_keep_html_head(b"<div><rt></rt></div>", b"<div><rt></div>");
eval_with_keep_html_head(
eval(b"<div><rt></rt></div>", b"<div><rt></div>");
eval(
br#"
<html>
<head>
@ -204,7 +130,7 @@ fn test_removal_of_optional_tags() {
b"<html><head><body>",
);
// Tag names should be case insensitive.
eval_with_keep_html_head(b"<RT></rt>", b"<rt>");
eval(b"<RT></rt>", b"<rt>");
}
#[test]
@ -250,18 +176,6 @@ fn test_attr_unquoted_value_minification() {
eval(b"<a b=hello></a>", b"<a b=hello></a>");
}
#[test]
fn test_attr_whatwg_unquoted_value_minification() {
let mut cfg = super::Cfg::new();
cfg.ensure_spec_compliant_unquoted_attribute_values = true;
eval_with_cfg(b"<a b==></a>", br#"<a b="="></a>"#, &cfg);
eval_with_cfg(
br#"<a b=`'"<<==/`/></a>"#,
br#"<a b="`'&#34<<==/`/"></a>"#,
&cfg,
);
}
#[test]
fn test_class_attr_value_minification() {
eval(b"<a class=&#x20;c></a>", b"<a class=c></a>");
@ -381,22 +295,6 @@ fn test_empty_attr_value_removal() {
eval(b"<div a></div>", b"<div a></div>");
}
#[test]
fn test_space_between_attrs_minification() {
eval(
b"<div a=\" \" b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=' ' b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
eval(
b"<div a=&#x20 b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=\"1\" b=\" \"></div>", b"<div a=1 b=\" \"></div>");
eval(b"<div a='1' b=\" \"></div>", b"<div a=1 b=\" \"></div>");
eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>");
}
#[test]
fn test_hexadecimal_entity_decoding() {
eval(b"&#x2E", b".");
@ -591,33 +489,28 @@ fn test_js_minification_unintentional_closing_tag() {
br#"<script>let a = "</" + "script>";</script>"#,
br#"<script>let a="<\/script>";</script>"#,
);
eval_with_js_min(
br#"<script>let a = "</S" + "cRiPT>";</script>"#,
br#"<script>let a="<\/ScRiPT>";</script>"#,
);
// TODO Reenable once esbuild handles closing tags case insensitively (evanw/esbuild#1509).
// eval_with_js_min(
// br#"<script>let a = "</S" + "cRiPT>";</script>"#,
// br#"<script>let a="<\/ScRiPT>";</script>"#,
// );
eval_with_js_min(
br#"<script>let a = "\u003c/script>";</script>"#,
br#"<script>let a="<\/script>";</script>"#,
);
eval_with_js_min(
br#"<script>let a = "\u003c/scrIPt>";</script>"#,
br#"<script>let a="<\/scrIPt>";</script>"#,
);
// TODO Reenable once esbuild handles closing tags case insensitively (evanw/esbuild#1509).
// eval_with_js_min(
// br#"<script>let a = "\u003c/scrIPt>";</script>"#,
// br#"<script>let a="<\/scrIPt>";</script>"#,
// );
}
#[cfg(feature = "js-esbuild")]
#[test]
fn test_css_minification() {
fn test_style_element_minification() {
// `<style>` contents.
eval_with_css_min(
b"<style>div { color: yellow }</style>",
b"<style>div{color:#ff0}</style>",
);
// `style` attributes.
eval_with_css_min(
br#"<div style="color: yellow;"></div>"#,
br#"<div style=color:#ff0></div>"#,
);
// `style` attributes are removed if fully minified away.
eval_with_css_min(br#"<div style=" /* */ "></div>"#, br#"<div></div>"#);
}

View File

@ -11,6 +11,8 @@ mod common;
mod entity;
mod minify;
mod parse;
#[cfg(test)]
mod tests;
/// Minifies UTF-8 HTML code, represented as an array of bytes.
///

View File

@ -42,6 +42,10 @@ pub fn minify_element(
};
}
// Determinism.
quoted.sort_unstable_by(|a, b| a.0.cmp(&b.0));
unquoted.sort_unstable_by(|a, b| a.0.cmp(&b.0));
// Attributes list could become empty after minification, so check opening tag omission eligibility after attributes minification.
let can_omit_opening_tag = (tag_name == b"html" || tag_name == b"head")
&& quoted.len() + unquoted.len() == 0
@ -77,9 +81,8 @@ pub fn minify_element(
}
if closing_tag == ElementClosingTag::SelfClosing {
// Write a space after the tag name if there are no attributes,
// or the last attribute is unquoted.
if unquoted.len() > 0 || unquoted.len() + quoted.len() == 0 {
// Write a space only if the last attribute is unquoted.
if unquoted.len() > 0 {
out.push(b' ');
};
out.push(b'/');

125
rust/main/src/tests/mod.rs Normal file
View File

@ -0,0 +1,125 @@
use std::str::from_utf8;
use crate::cfg::Cfg;
use crate::minify;
pub fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &Cfg) {
let min = minify(&src, cfg);
assert_eq!(from_utf8(&min).unwrap(), from_utf8(expected).unwrap(),);
}
#[cfg(feature = "js-esbuild")]
pub fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = Cfg::new();
cfg.minify_js = true;
eval_with_cfg(src, expected, &cfg);
}
#[cfg(feature = "js-esbuild")]
pub fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = Cfg::new();
cfg.minify_css = true;
eval_with_cfg(src, expected, &cfg);
}
pub fn eval(src: &'static [u8], expected: &'static [u8]) {
let mut cfg = Cfg::new();
// Most common tests assume the following minifications aren't done.
cfg.keep_html_and_head_opening_tags = true;
eval_with_cfg(src, expected, &cfg);
}
fn eval_without_keep_html_head(src: &'static [u8], expected: &'static [u8]) -> () {
eval_with_cfg(src, expected, &Cfg::new());
}
#[test]
fn test_parsing_extra_head_tag() {
// Extra `<head>` in `<label>` should be dropped, so whitespace around `<head>` should be joined and therefore trimmed due to `<label>` whitespace rules.
eval(
b"<html><head><meta><head><link><head><body><label> <pre> </pre> <head> </label>",
b"<html><head><meta><link><body><label><pre> </pre></label>",
);
// Same as above except it's a `</head>`, which should get reinterpreted as a `<head>`.
eval(
b"<html><head><meta><head><link><head><body><label> <pre> </pre> </head> </label>",
b"<html><head><meta><link><body><label><pre> </pre></label>",
);
// `<head>` gets implicitly closed by `<body>`, so any following `</head>` should be ignored. (They should be anyway, since `</head>` would not be a valid closing tag.)
eval(
b"<html><head><body><label> </head> </label>",
b"<html><head><body><label></label>",
);
}
#[test]
fn test_removal_of_html_and_head_opening_tags() {
// Even though `<head>` is dropped, it's still parsed, so its content is still subject to `<head>` whitespace minification rules.
eval_without_keep_html_head(
b"<!DOCTYPE html><html><head> <meta> <body>",
b"<!DOCTYPE html><meta><body>",
);
// The tag should not be dropped if it has attributes.
eval_without_keep_html_head(
b"<!DOCTYPE html><html lang=en><head> <meta> <body>",
b"<!DOCTYPE html><html lang=en><meta><body>",
);
// The tag should be dropped if it has no attributes after minification.
eval_without_keep_html_head(
b"<!DOCTYPE html><html style=' '><head> <meta> <body>",
b"<!DOCTYPE html><meta><body>",
);
}
#[test]
fn test_unmatched_closing_tag() {
eval(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
eval(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
eval(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
eval(b"<ul><li>a</p>", b"<ul><li>a<p>");
eval(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
eval(
b"<html><head><body><ul><li><rt>a</p>",
b"<html><head><body><ul><li><rt>a<p>",
);
}
#[test]
// NOTE: Keep inputs in sync with onepass variant. Outputs are different as main variant reorders attributes.
fn test_space_between_attrs_minification() {
eval(
b"<div a=\" \" b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=' ' b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
eval(
b"<div a=&#x20 b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=\"1\" b=\" \"></div>", b"<div b=\" \"a=1></div>");
eval(b"<div a='1' b=\" \"></div>", b"<div b=\" \"a=1></div>");
eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>");
}
#[test]
fn test_attr_whatwg_unquoted_value_minification() {
let mut cfg = Cfg::new();
cfg.ensure_spec_compliant_unquoted_attribute_values = true;
eval_with_cfg(b"<a b==></a>", br#"<a b="="></a>"#, &cfg);
eval_with_cfg(
br#"<a b=`'"<<==/`/></a>"#,
br#"<a b="`'&#34<<==/`/"></a>"#,
&cfg,
);
}
#[cfg(feature = "js-esbuild")]
#[test]
fn test_style_attr_minification() {
eval_with_css_min(
br#"<div style="color: yellow;"></div>"#,
br#"<div style=color:#ff0></div>"#,
);
// `style` attributes are removed if fully minified away.
eval_with_css_min(br#"<div style=" /* */ "></div>"#, br#"<div></div>"#);
}

View File

@ -15,3 +15,12 @@ pub struct Cfg {
/// enabled; otherwise, this value has no effect.
pub minify_css: bool,
}
impl Cfg {
pub fn new() -> Cfg {
Cfg {
minify_css: false,
minify_js: false,
}
}
}

View File

@ -10,6 +10,8 @@ mod common;
mod err;
#[macro_use]
mod proc;
#[cfg(test)]
mod tests;
mod unit;
/// Minifies a slice in-place and returns the new minified length.

View File

@ -114,8 +114,7 @@ fn parse_entity(code: &mut [u8], read_pos: usize, write_pos: usize, in_attr_val:
),
EntityType::Named(decoded) => {
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state.
// TODO Generated trie no longer contains encoded values, even if longer.
if decoded[0] == b'&' && decoded.len() > 1
if decoded.len() > match_len
|| in_attr_val
&& *code.get(read_pos + match_len - 1).unwrap() != b';'
&& code

View File

@ -0,0 +1,111 @@
use std::str::from_utf8;
use crate::cfg::Cfg;
use crate::err::ErrorType;
use crate::{in_place, with_friendly_error};
pub fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &Cfg) {
let mut code = src.to_vec();
match with_friendly_error(&mut code, cfg) {
Ok(len) => {
assert_eq!(
from_utf8(&code[..len]).unwrap(),
from_utf8(expected).unwrap()
);
}
Err(super::FriendlyError {
code_context,
message,
..
}) => {
println!("{}", message);
println!("{}", code_context);
assert!(false);
}
};
}
#[cfg(feature = "js-esbuild")]
pub fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = Cfg::new();
cfg.minify_js = true;
eval_with_cfg(src, expected, &cfg);
}
#[cfg(feature = "js-esbuild")]
pub fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = Cfg::new();
cfg.minify_css = true;
eval_with_cfg(src, expected, &cfg);
}
pub fn eval(src: &'static [u8], expected: &'static [u8]) {
eval_with_cfg(src, expected, &Cfg::new());
}
fn eval_error(src: &'static [u8], expected: ErrorType) -> () {
let mut code = src.to_vec();
assert_eq!(
in_place(
&mut code,
&Cfg {
minify_js: false,
minify_css: false,
}
)
.unwrap_err()
.error_type,
expected
);
}
#[test]
// NOTE: Keep inputs in sync with main variant. Outputs are different as main variant reorders attributes.
fn test_space_between_attrs_minification() {
eval(
b"<div a=\" \" b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=' ' b=\" \"></div>", b"<div a=\" \"b=\" \"></div>");
eval(
b"<div a=&#x20 b=\" \"></div>",
b"<div a=\" \"b=\" \"></div>",
);
eval(b"<div a=\"1\" b=\" \"></div>", b"<div a=1 b=\" \"></div>");
eval(b"<div a='1' b=\" \"></div>", b"<div a=1 b=\" \"></div>");
eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>");
}
#[test]
fn test_unmatched_closing_tag() {
eval_error(b"Hello</p>Goodbye", ErrorType::UnexpectedClosingTag);
eval_error(b"Hello<br></br>Goodbye", ErrorType::UnexpectedClosingTag);
eval_error(
b"<div>Hello</p>Goodbye",
ErrorType::ClosingTagMismatch {
expected: "div".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<ul><li>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<ul><li><rt>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<html><head><body><ul><li><rt>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
}

View File

@ -93,6 +93,9 @@ if (
cmd("git", "pull");
cmd("bash", "./prebuild.sh");
cmd("cargo", "test", "--features", "js-esbuild", { workingDir: RUST_MAIN_DIR });
cmd("cargo", "test", "--features", "js-esbuild", {
workingDir: RUST_ONEPASS_DIR,
});
for (const f of [
`${RUST_MAIN_DIR}/Cargo.toml`,