Implement JS and CSS minification; add tests

This commit is contained in:
Wilson Lin 2021-08-07 18:51:22 +10:00
parent 6650d94485
commit 4c0eb3ed28
7 changed files with 154 additions and 39 deletions

View File

@ -22,6 +22,6 @@ js-esbuild = ["crossbeam", "esbuild-rs"]
[dependencies]
aho-corasick = "0.7"
crossbeam = { version = "0.7", optional = true }
esbuild-rs = { version = "0.8.30", optional = true }
esbuild-rs = { version = "0.12.18", optional = true }
lazy_static = "1.4"
memchr = "2"

View File

@ -1,6 +1,39 @@
#[cfg(feature = "js-esbuild")]
use {
crate::minify::esbuild::minify_using_esbuild,
aho_corasick::{AhoCorasick, AhoCorasickBuilder},
esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder},
lazy_static::lazy_static,
std::sync::Arc,
};
use crate::cfg::Cfg;
pub fn minify_css(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
// TODO
out.extend_from_slice(code);
#[cfg(feature = "js-esbuild")]
lazy_static! {
static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(&["</style"]);
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new();
builder.loader = Loader::CSS;
builder.minify_identifiers = true;
builder.minify_syntax = true;
builder.minify_whitespace = true;
builder.build()
};
}
#[cfg(not(feature = "js-esbuild"))]
pub fn minify_css(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
out.extend_from_slice(&code);
}
#[cfg(feature = "js-esbuild")]
pub fn minify_css(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
if !cfg.minify_css {
out.extend_from_slice(&code);
} else {
minify_using_esbuild(out, code, &TRANSFORM_OPTIONS.clone(), &STYLE_END);
}
}

42
src/minify/esbuild.rs Normal file
View File

@ -0,0 +1,42 @@
#[cfg(feature = "js-esbuild")]
use {aho_corasick::AhoCorasick, crossbeam::sync::WaitGroup, esbuild_rs::TransformOptions};
#[cfg(feature = "js-esbuild")]
// TODO The use of WG is ugly and we don't want to be multi-threaded; wait for Rust port esbuild-transform-rs.
// `tag_to_escape` must be case insensitive.
pub fn minify_using_esbuild(
out: &mut Vec<u8>,
code: &[u8],
transform_options: &TransformOptions,
tag_to_escape: &'static AhoCorasick,
) {
let wg = WaitGroup::new();
unsafe {
let wg = wg.clone();
esbuild_rs::transform_direct_unmanaged(code, transform_options, move |result| {
// TODO (JS) Handle other forms:
// 1 < /script/.exec(a).length
// ` ${` ${a</script/} `} `
// // </script>
// /* </script>
// Considerations:
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
// TODO (CSS) Are there other places that can have unintentional closing tags?
tag_to_escape.replace_all_with_bytes(
result.code.as_str().trim().as_bytes(),
out,
|_, orig, dst| {
dst.extend(b"<\\/");
// Keep original case.
dst.extend(&orig[2..]);
true
},
);
drop(wg);
});
};
wg.wait();
}

View File

@ -1,6 +1,38 @@
use crate::cfg::Cfg;
#[cfg(feature = "js-esbuild")]
use {
crate::minify::esbuild::minify_using_esbuild,
aho_corasick::{AhoCorasick, AhoCorasickBuilder},
esbuild_rs::{TransformOptions, TransformOptionsBuilder},
lazy_static::lazy_static,
std::sync::Arc,
};
pub fn minify_js(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
// TODO
out.extend_from_slice(code);
use crate::Cfg;
#[cfg(feature = "js-esbuild")]
lazy_static! {
static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(&["</script"]);
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new();
builder.minify_identifiers = true;
builder.minify_syntax = true;
builder.minify_whitespace = true;
builder.build()
};
}
#[cfg(not(feature = "js-esbuild"))]
pub fn minify_js(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
out.extend_from_slice(&code);
}
#[cfg(feature = "js-esbuild")]
pub fn minify_js(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
if !cfg.minify_js {
out.extend_from_slice(&code);
} else {
minify_using_esbuild(out, code, &TRANSFORM_OPTIONS.clone(), &SCRIPT_END);
}
}

View File

@ -4,6 +4,7 @@ pub mod comment;
pub mod content;
pub mod css;
pub mod element;
pub mod esbuild;
pub mod instruction;
pub mod js;
#[cfg(test)]

View File

@ -15,6 +15,7 @@ pub enum TrieNodeMatch<V: 'static + Copy> {
NotFound { reached: usize },
}
#[allow(dead_code)]
impl<V: 'static + Copy> TrieNode<V> {
// Find the node that matches the shortest prefix of {@param text} that:
// - has a value (except the start node if it has a value);
@ -32,7 +33,7 @@ impl<V: 'static + Copy> TrieNode<V> {
// - "&amx" will return node `m`.
// - "&ax" will return node `a`.
// - "+ax" will return itself.
// - "" will return the itself.
// - "" will return itself.
pub fn shortest_matching_prefix(&self, text: &[u8], from: usize) -> (&TrieNode<V>, usize) {
let mut node: &TrieNode<V> = self;
let mut pos = from;

View File

@ -1,4 +1,4 @@
fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
let mut code = src.to_vec();
let min = super::minify(&mut code, cfg);
assert_eq!(
@ -8,21 +8,27 @@ fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
}
fn eval(src: &'static [u8], expected: &'static [u8]) {
_eval(src, expected, &super::Cfg::new());
eval_with_cfg(src, expected, &super::Cfg::new());
}
fn eval_with_keep_html_head(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = super::Cfg::new();
cfg.keep_html_and_head_opening_tags = true;
eval_with_cfg(src, expected, &cfg);
}
#[cfg(feature = "js-esbuild")]
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = super::Cfg::new();
cfg.minify_js = true;
_eval(src, expected, &cfg);
eval_with_cfg(src, expected, &cfg);
}
#[cfg(feature = "js-esbuild")]
fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = super::Cfg::new();
cfg.minify_css = true;
_eval(src, expected, &cfg);
eval_with_cfg(src, expected, &cfg);
}
#[test]
@ -97,17 +103,17 @@ fn test_no_whitespace_minification() {
#[test]
fn test_parsing_extra_head_tag() {
// Extra `<head>` in `<label>` should be dropped, so whitespace around `<head>` should be joined and therefore trimmed due to `<label>` whitespace rules.
eval(
eval_with_keep_html_head(
b"<html><head><meta><head><link><head><body><label> <pre> </pre> <head> </label>",
b"<html><head><meta><link><body><label><pre> </pre></label>",
);
// Same as above except it's a `</head>`, which should get reinterpreted as a `<head>`.
eval(
eval_with_keep_html_head(
b"<html><head><meta><head><link><head><body><label> <pre> </pre> </head> </label>",
b"<html><head><meta><link><body><label><pre> </pre></label>",
);
// `<head>` gets implicitly closed by `<body>`, so any following `</head>` should be ignored. (They should be anyway, since `</head>` would not be a valid closing tag.)
eval(
eval_with_keep_html_head(
b"<html><head><body><label> </head> </label>",
b"<html><head><body><label></label>",
);
@ -115,10 +121,10 @@ fn test_parsing_extra_head_tag() {
#[test]
fn test_parsing_omitted_closing_tag() {
eval(b"<html>", b"<html>");
eval(b" <html>\n", b"<html>");
eval(b" <!doctype html> <html>\n", b"<!doctype html><html>");
eval(
eval_with_keep_html_head(b"<html>", b"<html>");
eval_with_keep_html_head(b" <html>\n", b"<html>");
eval_with_keep_html_head(b" <!doctype html> <html>\n", b"<!doctype html><html>");
eval_with_keep_html_head(
b"<!doctype html><html><div> <p>Foo</div></html>",
b"<!doctype html><html><div><p>Foo</div>",
);
@ -136,24 +142,24 @@ fn test_self_closing_svg_tag_whitespace_removal() {
#[test]
fn test_parsing_with_omitted_tags() {
eval(b"<ul><li>1<li>2<li>3</ul>", b"<ul><li>1<li>2<li>3</ul>");
eval(b"<rt>", b"<rt>");
eval(b"<rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
eval(b"<div><rt></div>", b"<div><rt></div>");
eval(b"<html><head><body>", b"<html><head><body>");
eval(b"<html><head><body>", b"<html><head><body>");
eval_with_keep_html_head(b"<ul><li>1<li>2<li>3</ul>", b"<ul><li>1<li>2<li>3</ul>");
eval_with_keep_html_head(b"<rt>", b"<rt>");
eval_with_keep_html_head(b"<rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
eval_with_keep_html_head(b"<div><rt></div>", b"<div><rt></div>");
eval_with_keep_html_head(b"<html><head><body>", b"<html><head><body>");
eval_with_keep_html_head(b"<html><head><body>", b"<html><head><body>");
// Tag names should be case insensitive.
eval(b"<rt>", b"<rt>");
eval_with_keep_html_head(b"<rt>", b"<rt>");
}
#[test]
fn test_unmatched_closing_tag() {
eval(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
eval(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
eval(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
eval(b"<ul><li>a</p>", b"<ul><li>a<p>");
eval(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
eval(
eval_with_keep_html_head(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
eval_with_keep_html_head(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
eval_with_keep_html_head(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
eval_with_keep_html_head(b"<ul><li>a</p>", b"<ul><li>a<p>");
eval_with_keep_html_head(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
eval_with_keep_html_head(
b"<html><head><body><ul><li><rt>a</p>",
b"<html><head><body><ul><li><rt>a<p>",
);
@ -175,17 +181,17 @@ fn test_removal_of_html_and_head_opening_tags() {
#[test]
fn test_removal_of_optional_tags() {
eval(
eval_with_keep_html_head(
b"<ul><li>1</li><li>2</li><li>3</li></ul>",
b"<ul><li>1<li>2<li>3</ul>",
);
eval(b"<rt></rt>", b"<rt>");
eval(
eval_with_keep_html_head(b"<rt></rt>", b"<rt>");
eval_with_keep_html_head(
b"<rt></rt><rp>1</rp><div></div>",
b"<rt><rp>1</rp><div></div>",
);
eval(b"<div><rt></rt></div>", b"<div><rt></div>");
eval(
eval_with_keep_html_head(b"<div><rt></rt></div>", b"<div><rt></div>");
eval_with_keep_html_head(
br#"
<html>
<head>
@ -198,7 +204,7 @@ fn test_removal_of_optional_tags() {
b"<html><head><body>",
);
// Tag names should be case insensitive.
eval(b"<RT></rt>", b"<rt>");
eval_with_keep_html_head(b"<RT></rt>", b"<rt>");
}
#[test]