Implement JS and CSS minification; add tests
This commit is contained in:
parent
6650d94485
commit
4c0eb3ed28
|
@ -22,6 +22,6 @@ js-esbuild = ["crossbeam", "esbuild-rs"]
|
|||
[dependencies]
|
||||
aho-corasick = "0.7"
|
||||
crossbeam = { version = "0.7", optional = true }
|
||||
esbuild-rs = { version = "0.8.30", optional = true }
|
||||
esbuild-rs = { version = "0.12.18", optional = true }
|
||||
lazy_static = "1.4"
|
||||
memchr = "2"
|
||||
|
|
|
@ -1,6 +1,39 @@
|
|||
#[cfg(feature = "js-esbuild")]
|
||||
use {
|
||||
crate::minify::esbuild::minify_using_esbuild,
|
||||
aho_corasick::{AhoCorasick, AhoCorasickBuilder},
|
||||
esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder},
|
||||
lazy_static::lazy_static,
|
||||
std::sync::Arc,
|
||||
};
|
||||
|
||||
use crate::cfg::Cfg;
|
||||
|
||||
pub fn minify_css(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
|
||||
// TODO
|
||||
out.extend_from_slice(code);
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
lazy_static! {
|
||||
static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(&["</style"]);
|
||||
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
|
||||
let mut builder = TransformOptionsBuilder::new();
|
||||
builder.loader = Loader::CSS;
|
||||
builder.minify_identifiers = true;
|
||||
builder.minify_syntax = true;
|
||||
builder.minify_whitespace = true;
|
||||
builder.build()
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "js-esbuild"))]
|
||||
pub fn minify_css(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
|
||||
out.extend_from_slice(&code);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
pub fn minify_css(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
|
||||
if !cfg.minify_css {
|
||||
out.extend_from_slice(&code);
|
||||
} else {
|
||||
minify_using_esbuild(out, code, &TRANSFORM_OPTIONS.clone(), &STYLE_END);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#[cfg(feature = "js-esbuild")]
|
||||
use {aho_corasick::AhoCorasick, crossbeam::sync::WaitGroup, esbuild_rs::TransformOptions};
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
// TODO The use of WG is ugly and we don't want to be multi-threaded; wait for Rust port esbuild-transform-rs.
|
||||
// `tag_to_escape` must be case insensitive.
|
||||
pub fn minify_using_esbuild(
|
||||
out: &mut Vec<u8>,
|
||||
code: &[u8],
|
||||
transform_options: &TransformOptions,
|
||||
tag_to_escape: &'static AhoCorasick,
|
||||
) {
|
||||
let wg = WaitGroup::new();
|
||||
unsafe {
|
||||
let wg = wg.clone();
|
||||
esbuild_rs::transform_direct_unmanaged(code, transform_options, move |result| {
|
||||
// TODO (JS) Handle other forms:
|
||||
// 1 < /script/.exec(a).length
|
||||
// ` ${` ${a</script/} `} `
|
||||
// // </script>
|
||||
// /* </script>
|
||||
// Considerations:
|
||||
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
|
||||
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
|
||||
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
|
||||
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
|
||||
// TODO (CSS) Are there other places that can have unintentional closing tags?
|
||||
tag_to_escape.replace_all_with_bytes(
|
||||
result.code.as_str().trim().as_bytes(),
|
||||
out,
|
||||
|_, orig, dst| {
|
||||
dst.extend(b"<\\/");
|
||||
// Keep original case.
|
||||
dst.extend(&orig[2..]);
|
||||
true
|
||||
},
|
||||
);
|
||||
drop(wg);
|
||||
});
|
||||
};
|
||||
wg.wait();
|
||||
}
|
|
@ -1,6 +1,38 @@
|
|||
use crate::cfg::Cfg;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use {
|
||||
crate::minify::esbuild::minify_using_esbuild,
|
||||
aho_corasick::{AhoCorasick, AhoCorasickBuilder},
|
||||
esbuild_rs::{TransformOptions, TransformOptionsBuilder},
|
||||
lazy_static::lazy_static,
|
||||
std::sync::Arc,
|
||||
};
|
||||
|
||||
pub fn minify_js(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
|
||||
// TODO
|
||||
out.extend_from_slice(code);
|
||||
use crate::Cfg;
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
lazy_static! {
|
||||
static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(&["</script"]);
|
||||
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
|
||||
let mut builder = TransformOptionsBuilder::new();
|
||||
builder.minify_identifiers = true;
|
||||
builder.minify_syntax = true;
|
||||
builder.minify_whitespace = true;
|
||||
builder.build()
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "js-esbuild"))]
|
||||
pub fn minify_js(_cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
|
||||
out.extend_from_slice(&code);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
pub fn minify_js(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
|
||||
if !cfg.minify_js {
|
||||
out.extend_from_slice(&code);
|
||||
} else {
|
||||
minify_using_esbuild(out, code, &TRANSFORM_OPTIONS.clone(), &SCRIPT_END);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ pub mod comment;
|
|||
pub mod content;
|
||||
pub mod css;
|
||||
pub mod element;
|
||||
pub mod esbuild;
|
||||
pub mod instruction;
|
||||
pub mod js;
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -15,6 +15,7 @@ pub enum TrieNodeMatch<V: 'static + Copy> {
|
|||
NotFound { reached: usize },
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<V: 'static + Copy> TrieNode<V> {
|
||||
// Find the node that matches the shortest prefix of {@param text} that:
|
||||
// - has a value (except the start node if it has a value);
|
||||
|
@ -32,7 +33,7 @@ impl<V: 'static + Copy> TrieNode<V> {
|
|||
// - "&amx" will return node `m`.
|
||||
// - "&ax" will return node `a`.
|
||||
// - "+ax" will return itself.
|
||||
// - "" will return the itself.
|
||||
// - "" will return itself.
|
||||
pub fn shortest_matching_prefix(&self, text: &[u8], from: usize) -> (&TrieNode<V>, usize) {
|
||||
let mut node: &TrieNode<V> = self;
|
||||
let mut pos = from;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
|
||||
fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
|
||||
let mut code = src.to_vec();
|
||||
let min = super::minify(&mut code, cfg);
|
||||
assert_eq!(
|
||||
|
@ -8,21 +8,27 @@ fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) {
|
|||
}
|
||||
|
||||
fn eval(src: &'static [u8], expected: &'static [u8]) {
|
||||
_eval(src, expected, &super::Cfg::new());
|
||||
eval_with_cfg(src, expected, &super::Cfg::new());
|
||||
}
|
||||
|
||||
fn eval_with_keep_html_head(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||
let mut cfg = super::Cfg::new();
|
||||
cfg.keep_html_and_head_opening_tags = true;
|
||||
eval_with_cfg(src, expected, &cfg);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||
let mut cfg = super::Cfg::new();
|
||||
cfg.minify_js = true;
|
||||
_eval(src, expected, &cfg);
|
||||
eval_with_cfg(src, expected, &cfg);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||
let mut cfg = super::Cfg::new();
|
||||
cfg.minify_css = true;
|
||||
_eval(src, expected, &cfg);
|
||||
eval_with_cfg(src, expected, &cfg);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -97,17 +103,17 @@ fn test_no_whitespace_minification() {
|
|||
#[test]
|
||||
fn test_parsing_extra_head_tag() {
|
||||
// Extra `<head>` in `<label>` should be dropped, so whitespace around `<head>` should be joined and therefore trimmed due to `<label>` whitespace rules.
|
||||
eval(
|
||||
eval_with_keep_html_head(
|
||||
b"<html><head><meta><head><link><head><body><label> <pre> </pre> <head> </label>",
|
||||
b"<html><head><meta><link><body><label><pre> </pre></label>",
|
||||
);
|
||||
// Same as above except it's a `</head>`, which should get reinterpreted as a `<head>`.
|
||||
eval(
|
||||
eval_with_keep_html_head(
|
||||
b"<html><head><meta><head><link><head><body><label> <pre> </pre> </head> </label>",
|
||||
b"<html><head><meta><link><body><label><pre> </pre></label>",
|
||||
);
|
||||
// `<head>` gets implicitly closed by `<body>`, so any following `</head>` should be ignored. (They should be anyway, since `</head>` would not be a valid closing tag.)
|
||||
eval(
|
||||
eval_with_keep_html_head(
|
||||
b"<html><head><body><label> </head> </label>",
|
||||
b"<html><head><body><label></label>",
|
||||
);
|
||||
|
@ -115,10 +121,10 @@ fn test_parsing_extra_head_tag() {
|
|||
|
||||
#[test]
|
||||
fn test_parsing_omitted_closing_tag() {
|
||||
eval(b"<html>", b"<html>");
|
||||
eval(b" <html>\n", b"<html>");
|
||||
eval(b" <!doctype html> <html>\n", b"<!doctype html><html>");
|
||||
eval(
|
||||
eval_with_keep_html_head(b"<html>", b"<html>");
|
||||
eval_with_keep_html_head(b" <html>\n", b"<html>");
|
||||
eval_with_keep_html_head(b" <!doctype html> <html>\n", b"<!doctype html><html>");
|
||||
eval_with_keep_html_head(
|
||||
b"<!doctype html><html><div> <p>Foo</div></html>",
|
||||
b"<!doctype html><html><div><p>Foo</div>",
|
||||
);
|
||||
|
@ -136,24 +142,24 @@ fn test_self_closing_svg_tag_whitespace_removal() {
|
|||
|
||||
#[test]
|
||||
fn test_parsing_with_omitted_tags() {
|
||||
eval(b"<ul><li>1<li>2<li>3</ul>", b"<ul><li>1<li>2<li>3</ul>");
|
||||
eval(b"<rt>", b"<rt>");
|
||||
eval(b"<rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
|
||||
eval(b"<div><rt></div>", b"<div><rt></div>");
|
||||
eval(b"<html><head><body>", b"<html><head><body>");
|
||||
eval(b"<html><head><body>", b"<html><head><body>");
|
||||
eval_with_keep_html_head(b"<ul><li>1<li>2<li>3</ul>", b"<ul><li>1<li>2<li>3</ul>");
|
||||
eval_with_keep_html_head(b"<rt>", b"<rt>");
|
||||
eval_with_keep_html_head(b"<rt><rp>1</rp><div></div>", b"<rt><rp>1</rp><div></div>");
|
||||
eval_with_keep_html_head(b"<div><rt></div>", b"<div><rt></div>");
|
||||
eval_with_keep_html_head(b"<html><head><body>", b"<html><head><body>");
|
||||
eval_with_keep_html_head(b"<html><head><body>", b"<html><head><body>");
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<rt>", b"<rt>");
|
||||
eval_with_keep_html_head(b"<rt>", b"<rt>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unmatched_closing_tag() {
|
||||
eval(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
|
||||
eval(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
|
||||
eval(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
|
||||
eval(b"<ul><li>a</p>", b"<ul><li>a<p>");
|
||||
eval(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
|
||||
eval(
|
||||
eval_with_keep_html_head(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
|
||||
eval_with_keep_html_head(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
|
||||
eval_with_keep_html_head(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
|
||||
eval_with_keep_html_head(b"<ul><li>a</p>", b"<ul><li>a<p>");
|
||||
eval_with_keep_html_head(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
|
||||
eval_with_keep_html_head(
|
||||
b"<html><head><body><ul><li><rt>a</p>",
|
||||
b"<html><head><body><ul><li><rt>a<p>",
|
||||
);
|
||||
|
@ -175,17 +181,17 @@ fn test_removal_of_html_and_head_opening_tags() {
|
|||
|
||||
#[test]
|
||||
fn test_removal_of_optional_tags() {
|
||||
eval(
|
||||
eval_with_keep_html_head(
|
||||
b"<ul><li>1</li><li>2</li><li>3</li></ul>",
|
||||
b"<ul><li>1<li>2<li>3</ul>",
|
||||
);
|
||||
eval(b"<rt></rt>", b"<rt>");
|
||||
eval(
|
||||
eval_with_keep_html_head(b"<rt></rt>", b"<rt>");
|
||||
eval_with_keep_html_head(
|
||||
b"<rt></rt><rp>1</rp><div></div>",
|
||||
b"<rt><rp>1</rp><div></div>",
|
||||
);
|
||||
eval(b"<div><rt></rt></div>", b"<div><rt></div>");
|
||||
eval(
|
||||
eval_with_keep_html_head(b"<div><rt></rt></div>", b"<div><rt></div>");
|
||||
eval_with_keep_html_head(
|
||||
br#"
|
||||
<html>
|
||||
<head>
|
||||
|
@ -198,7 +204,7 @@ fn test_removal_of_optional_tags() {
|
|||
b"<html><head><body>",
|
||||
);
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<RT></rt>", b"<rt>");
|
||||
eval_with_keep_html_head(b"<RT></rt>", b"<rt>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in New Issue