Use minify-js for onepass

This commit is contained in:
Wilson Lin 2022-06-22 01:36:27 +10:00
parent b1c3198155
commit aa6c0e31ae
8 changed files with 33 additions and 200 deletions

View File

@ -6,6 +6,6 @@ authors = ["Wilson Lin <code@wilsonl.in>"]
edition = "2018"
[dependencies]
minify-html-onepass = { path = "../../../rust/onepass", features = ["js-esbuild"] }
minify-html-onepass = { path = "../../../rust/onepass" }
serde = { version = "1.0.104", features = ["derive"] }
serde_json = "1.0.44"

View File

@ -1,6 +1,6 @@
[package]
name = "minify-html-onepass"
description = "Alternate version of minify-html"
description = "Even faster version of minify-html"
license = "MIT"
homepage = "https://github.com/wilsonzlin/minify-html"
readme = "README.md"
@ -15,13 +15,9 @@ include = ["/src/**/*", "/Cargo.toml", "/LICENSE", "/README.md"]
[badges]
maintenance = { status = "actively-developed" }
[features]
default = []
js-esbuild = ["crossbeam", "esbuild-rs"]
[dependencies]
aho-corasick = "0.7"
crossbeam = { version = "0.7", optional = true }
esbuild-rs = { version = "0.13.8", optional = true }
css-minify = "0.2.2"
lazy_static = "1.4"
memchr = "2"
minify-js = "0.1.0"

View File

@ -2,17 +2,14 @@
/// minification approach.
pub struct Cfg {
/// If enabled, JavaScript in `<script>` tags are minified using
/// [esbuild-rs](https://github.com/wilsonzlin/esbuild-rs). The `js-esbuild` feature must be
/// enabled; otherwise, this value has no effect.
/// [minify-js](https://github.com/wilsonzlin/minify-js).
///
/// Only `<script>` tags with a valid or no
/// [MIME type](https://mimesniff.spec.whatwg.org/#javascript-mime-type) is considered to
/// contain JavaScript, as per the specification.
pub minify_js: bool,
/// If enabled, CSS in `<style>` tags are minified using
/// [esbuild-rs](https://github.com/wilsonzlin/esbuild-rs). The `js-esbuild` feature must be
/// enabled; otherwise, this value has no effect.
/// If enabled, CSS in `<style>` tags are minified.
pub minify_css: bool,
}

View File

@ -5,12 +5,6 @@ use std::ops::{Index, IndexMut};
use aho_corasick::AhoCorasick;
use memchr::memchr;
#[cfg(feature = "js-esbuild")]
use {
crossbeam::sync::WaitGroup,
std::sync::{Arc, Mutex},
};
use crate::common::gen::codepoints::Lookup;
use crate::common::spec::tag::EMPTY_SLICE;
use crate::err::{debug_repr, Error, ErrorType, ProcessingResult};
@ -51,12 +45,6 @@ pub enum MatchAction {
MatchOnly,
}
#[cfg(feature = "js-esbuild")]
pub struct EsbuildSection {
pub src: ProcessorRange,
pub escaped: Vec<u8>,
}
// Processing state of a file. Single use only; create one per processing.
pub struct Processor<'d> {
code: &'d mut [u8],
@ -64,10 +52,6 @@ pub struct Processor<'d> {
read_next: usize,
// Index of the next unwritten space.
write_next: usize,
#[cfg(feature = "js-esbuild")]
esbuild_wg: WaitGroup,
#[cfg(feature = "js-esbuild")]
esbuild_results: Arc<Mutex<Vec<EsbuildSection>>>,
}
impl<'d> Index<ProcessorRange> for Processor<'d> {
@ -96,10 +80,6 @@ impl<'d> Processor<'d> {
write_next: 0,
read_next: 0,
code,
#[cfg(feature = "js-esbuild")]
esbuild_wg: WaitGroup::new(),
#[cfg(feature = "js-esbuild")]
esbuild_results: Arc::new(Mutex::new(Vec::new())),
}
}
@ -380,61 +360,12 @@ impl<'d> Processor<'d> {
self._shift(count);
}
#[cfg(feature = "js-esbuild")]
#[inline(always)]
pub fn new_esbuild_section(&self) -> (WaitGroup, Arc<Mutex<Vec<EsbuildSection>>>) {
(self.esbuild_wg.clone(), self.esbuild_results.clone())
}
// Since we consume the Processor, we must provide a full Error with positions.
#[cfg(not(feature = "js-esbuild"))]
#[inline(always)]
pub fn finish(self) -> Result<usize, Error> {
debug_assert!(self.at_end());
Ok(self.write_next)
}
// Since we consume the Processor, we must provide a full Error with positions.
#[cfg(feature = "js-esbuild")]
#[inline(always)]
pub fn finish(self) -> Result<usize, Error> {
debug_assert!(self.at_end());
self.esbuild_wg.wait();
let mut results = Arc::try_unwrap(self.esbuild_results)
.unwrap_or_else(|_| panic!("failed to acquire esbuild results"))
.into_inner()
.unwrap();
results.sort_unstable_by_key(|r| r.src.start);
// As we write minified JS/CSS code for sections from left to right, we will be shifting code
// towards the left as previous source JS/CSS code sections shrink. We need to keep track of
// the write pointer after previous compaction.
// If there are no script sections, then we get self.write_next which will be returned.
let mut write_next = results.get(0).map_or(self.write_next, |r| r.src.start);
for (
i,
EsbuildSection {
escaped: min_code,
src,
},
) in results.iter().enumerate()
{
// Resulting minified JS/CSS to write.
let min_len = if min_code.len() < src.len() {
self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code);
min_code.len()
} else {
// If minified result is actually longer than source, then write source instead.
// NOTE: We still need to write source as previous iterations may have shifted code down.
self.code.copy_within(src.start..src.end, write_next);
src.len()
};
let write_end = write_next + min_len;
let next_start = results.get(i + 1).map_or(self.write_next, |r| r.src.start);
self.code.copy_within(src.end..next_start, write_end);
write_next = write_end + (next_start - src.end);
}
Ok(write_next)
}
}
impl Debug for Processor<'_> {

View File

@ -25,14 +25,12 @@ pub fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &Cfg) {
};
}
#[cfg(feature = "js-esbuild")]
pub fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = Cfg::new();
cfg.minify_js = true;
eval_with_cfg(src, expected, &cfg);
}
#[cfg(feature = "js-esbuild")]
pub fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
let mut cfg = Cfg::new();
cfg.minify_css = true;

View File

@ -1,31 +1,13 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use lazy_static::lazy_static;
#[cfg(feature = "js-esbuild")]
use {
crate::proc::checkpoint::WriteCheckpoint,
crate::proc::EsbuildSection,
esbuild_rs::{TransformOptions, TransformOptionsBuilder},
std::sync::Arc,
};
use crate::cfg::Cfg;
use crate::err::ProcessingResult;
use crate::proc::checkpoint::WriteCheckpoint;
use crate::proc::MatchAction::*;
use crate::proc::MatchMode::*;
use crate::proc::Processor;
#[cfg(feature = "js-esbuild")]
lazy_static! {
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new();
builder.minify_identifiers = true;
builder.minify_syntax = true;
builder.minify_whitespace = true;
builder.build()
};
}
lazy_static! {
static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
@ -34,54 +16,23 @@ lazy_static! {
#[inline(always)]
pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingResult<()> {
#[cfg(feature = "js-esbuild")]
let start = WriteCheckpoint::new(proc);
proc.require_not_at_end()?;
proc.m(WhileNotSeq(&SCRIPT_END), Keep);
let src = proc.m(WhileNotSeq(&SCRIPT_END), Discard);
// `process_tag` will require closing tag.
// TODO This is copied from style.rs.
#[cfg(feature = "js-esbuild")]
if js && cfg.minify_js {
let (wg, results) = proc.new_esbuild_section();
let src = start.written_range(proc);
unsafe {
esbuild_rs::transform_direct_unmanaged(
&proc[src],
&TRANSFORM_OPTIONS.clone(),
move |result| {
let mut guard = results.lock().unwrap();
// TODO Handle other forms:
// 1 < /script/.exec(a).length
// ` ${` ${a</script/} `} `
// // </script>
// /* </script>
// Considerations:
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
let mut escaped = Vec::<u8>::new();
// SCRIPT_END must be case insensitive.
SCRIPT_END.replace_all_with_bytes(
result.code.as_str().trim().as_bytes(),
&mut escaped,
|_, orig, dst| {
dst.extend(b"<\\/");
// Keep original case.
dst.extend(&orig[2..]);
true
},
);
guard.push(EsbuildSection { src, escaped });
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
drop(guard);
drop(results);
drop(wg);
},
);
// TODO Write to `out` directly, but only if we can guarantee that the length will never exceed the input.
let mut output = Vec::new();
let result = minify_js::minify(proc[src].to_vec(), &mut output);
// TODO Collect error as warning.
if !result.is_err() && output.len() < src.len() {
proc.write_slice(output.as_slice());
} else {
proc.write_range(src);
};
} else {
proc.write_range(src);
};
Ok(())

View File

@ -1,32 +1,16 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use css_minify::optimizations::{Level, Minifier};
use lazy_static::lazy_static;
#[cfg(feature = "js-esbuild")]
use {
crate::proc::checkpoint::WriteCheckpoint,
crate::proc::EsbuildSection,
esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder},
std::sync::Arc,
};
use std::str::from_utf8_unchecked;
use crate::err::ProcessingResult;
use crate::proc::checkpoint::WriteCheckpoint;
use crate::proc::MatchAction::*;
use crate::proc::MatchMode::*;
use crate::proc::Processor;
use crate::Cfg;
#[cfg(feature = "js-esbuild")]
lazy_static! {
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new();
builder.loader = Loader::CSS;
builder.minify_identifiers = true;
builder.minify_syntax = true;
builder.minify_whitespace = true;
builder.build()
};
}
lazy_static! {
static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
@ -35,45 +19,23 @@ lazy_static! {
#[inline(always)]
pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
#[cfg(feature = "js-esbuild")]
let start = WriteCheckpoint::new(proc);
proc.require_not_at_end()?;
proc.m(WhileNotSeq(&STYLE_END), Keep);
let src = proc.m(WhileNotSeq(&STYLE_END), Discard);
// `process_tag` will require closing tag.
// TODO This is copied from script.rs.
#[cfg(feature = "js-esbuild")]
if cfg.minify_css {
let (wg, results) = proc.new_esbuild_section();
let src = start.written_range(proc);
unsafe {
esbuild_rs::transform_direct_unmanaged(
&proc[src],
&TRANSFORM_OPTIONS.clone(),
move |result| {
let mut guard = results.lock().unwrap();
// TODO Are there other places that can have unintentional closing tags?
let mut escaped = Vec::<u8>::new();
// STYLE_END must be case insensitive.
STYLE_END.replace_all_with_bytes(
result.code.as_str().trim().as_bytes(),
&mut escaped,
|_, orig, dst| {
dst.extend(b"<\\/");
// Keep original case.
dst.extend(&orig[2..]);
true
},
);
guard.push(EsbuildSection { src, escaped });
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
drop(guard);
drop(results);
drop(wg);
},
);
let result = Minifier::default()
.minify(unsafe { from_utf8_unchecked(&proc[src]) }, Level::Three)
.ok();
// TODO Collect error as warning.
if result.as_ref().filter(|r| r.len() < src.len()).is_some() {
proc.write_slice(result.unwrap().as_bytes());
} else {
proc.write_range(src);
};
} else {
proc.write_range(src);
};
Ok(())

View File

@ -93,9 +93,7 @@ if (
cmd("git", "pull");
cmd("bash", "./prebuild.sh");
cmd("cargo", "test", { workingDir: RUST_MAIN_DIR });
cmd("cargo", "test", "--features", "js-esbuild", {
workingDir: RUST_ONEPASS_DIR,
});
cmd("cargo", "test", { workingDir: RUST_ONEPASS_DIR });
for (const f of [
`${RUST_MAIN_DIR}/Cargo.toml`,