Use minify-js for onepass
This commit is contained in:
parent
b1c3198155
commit
aa6c0e31ae
|
@ -6,6 +6,6 @@ authors = ["Wilson Lin <code@wilsonl.in>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
minify-html-onepass = { path = "../../../rust/onepass", features = ["js-esbuild"] }
|
minify-html-onepass = { path = "../../../rust/onepass" }
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
serde = { version = "1.0.104", features = ["derive"] }
|
||||||
serde_json = "1.0.44"
|
serde_json = "1.0.44"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "minify-html-onepass"
|
name = "minify-html-onepass"
|
||||||
description = "Alternate version of minify-html"
|
description = "Even faster version of minify-html"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/wilsonzlin/minify-html"
|
homepage = "https://github.com/wilsonzlin/minify-html"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -15,13 +15,9 @@ include = ["/src/**/*", "/Cargo.toml", "/LICENSE", "/README.md"]
|
||||||
[badges]
|
[badges]
|
||||||
maintenance = { status = "actively-developed" }
|
maintenance = { status = "actively-developed" }
|
||||||
|
|
||||||
[features]
|
|
||||||
default = []
|
|
||||||
js-esbuild = ["crossbeam", "esbuild-rs"]
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
aho-corasick = "0.7"
|
aho-corasick = "0.7"
|
||||||
crossbeam = { version = "0.7", optional = true }
|
css-minify = "0.2.2"
|
||||||
esbuild-rs = { version = "0.13.8", optional = true }
|
|
||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
memchr = "2"
|
memchr = "2"
|
||||||
|
minify-js = "0.1.0"
|
||||||
|
|
|
@ -2,17 +2,14 @@
|
||||||
/// minification approach.
|
/// minification approach.
|
||||||
pub struct Cfg {
|
pub struct Cfg {
|
||||||
/// If enabled, JavaScript in `<script>` tags are minified using
|
/// If enabled, JavaScript in `<script>` tags are minified using
|
||||||
/// [esbuild-rs](https://github.com/wilsonzlin/esbuild-rs). The `js-esbuild` feature must be
|
/// [minify-js](https://github.com/wilsonzlin/minify-js).
|
||||||
/// enabled; otherwise, this value has no effect.
|
|
||||||
///
|
///
|
||||||
/// Only `<script>` tags with a valid or no
|
/// Only `<script>` tags with a valid or no
|
||||||
/// [MIME type](https://mimesniff.spec.whatwg.org/#javascript-mime-type) is considered to
|
/// [MIME type](https://mimesniff.spec.whatwg.org/#javascript-mime-type) is considered to
|
||||||
/// contain JavaScript, as per the specification.
|
/// contain JavaScript, as per the specification.
|
||||||
pub minify_js: bool,
|
pub minify_js: bool,
|
||||||
|
|
||||||
/// If enabled, CSS in `<style>` tags are minified using
|
/// If enabled, CSS in `<style>` tags are minified.
|
||||||
/// [esbuild-rs](https://github.com/wilsonzlin/esbuild-rs). The `js-esbuild` feature must be
|
|
||||||
/// enabled; otherwise, this value has no effect.
|
|
||||||
pub minify_css: bool,
|
pub minify_css: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,6 @@ use std::ops::{Index, IndexMut};
|
||||||
use aho_corasick::AhoCorasick;
|
use aho_corasick::AhoCorasick;
|
||||||
use memchr::memchr;
|
use memchr::memchr;
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
use {
|
|
||||||
crossbeam::sync::WaitGroup,
|
|
||||||
std::sync::{Arc, Mutex},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::common::gen::codepoints::Lookup;
|
use crate::common::gen::codepoints::Lookup;
|
||||||
use crate::common::spec::tag::EMPTY_SLICE;
|
use crate::common::spec::tag::EMPTY_SLICE;
|
||||||
use crate::err::{debug_repr, Error, ErrorType, ProcessingResult};
|
use crate::err::{debug_repr, Error, ErrorType, ProcessingResult};
|
||||||
|
@ -51,12 +45,6 @@ pub enum MatchAction {
|
||||||
MatchOnly,
|
MatchOnly,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
pub struct EsbuildSection {
|
|
||||||
pub src: ProcessorRange,
|
|
||||||
pub escaped: Vec<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Processing state of a file. Single use only; create one per processing.
|
// Processing state of a file. Single use only; create one per processing.
|
||||||
pub struct Processor<'d> {
|
pub struct Processor<'d> {
|
||||||
code: &'d mut [u8],
|
code: &'d mut [u8],
|
||||||
|
@ -64,10 +52,6 @@ pub struct Processor<'d> {
|
||||||
read_next: usize,
|
read_next: usize,
|
||||||
// Index of the next unwritten space.
|
// Index of the next unwritten space.
|
||||||
write_next: usize,
|
write_next: usize,
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
esbuild_wg: WaitGroup,
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
esbuild_results: Arc<Mutex<Vec<EsbuildSection>>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'d> Index<ProcessorRange> for Processor<'d> {
|
impl<'d> Index<ProcessorRange> for Processor<'d> {
|
||||||
|
@ -96,10 +80,6 @@ impl<'d> Processor<'d> {
|
||||||
write_next: 0,
|
write_next: 0,
|
||||||
read_next: 0,
|
read_next: 0,
|
||||||
code,
|
code,
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
esbuild_wg: WaitGroup::new(),
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
esbuild_results: Arc::new(Mutex::new(Vec::new())),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -380,61 +360,12 @@ impl<'d> Processor<'d> {
|
||||||
self._shift(count);
|
self._shift(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn new_esbuild_section(&self) -> (WaitGroup, Arc<Mutex<Vec<EsbuildSection>>>) {
|
|
||||||
(self.esbuild_wg.clone(), self.esbuild_results.clone())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Since we consume the Processor, we must provide a full Error with positions.
|
// Since we consume the Processor, we must provide a full Error with positions.
|
||||||
#[cfg(not(feature = "js-esbuild"))]
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn finish(self) -> Result<usize, Error> {
|
pub fn finish(self) -> Result<usize, Error> {
|
||||||
debug_assert!(self.at_end());
|
debug_assert!(self.at_end());
|
||||||
Ok(self.write_next)
|
Ok(self.write_next)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since we consume the Processor, we must provide a full Error with positions.
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn finish(self) -> Result<usize, Error> {
|
|
||||||
debug_assert!(self.at_end());
|
|
||||||
self.esbuild_wg.wait();
|
|
||||||
let mut results = Arc::try_unwrap(self.esbuild_results)
|
|
||||||
.unwrap_or_else(|_| panic!("failed to acquire esbuild results"))
|
|
||||||
.into_inner()
|
|
||||||
.unwrap();
|
|
||||||
results.sort_unstable_by_key(|r| r.src.start);
|
|
||||||
// As we write minified JS/CSS code for sections from left to right, we will be shifting code
|
|
||||||
// towards the left as previous source JS/CSS code sections shrink. We need to keep track of
|
|
||||||
// the write pointer after previous compaction.
|
|
||||||
// If there are no script sections, then we get self.write_next which will be returned.
|
|
||||||
let mut write_next = results.get(0).map_or(self.write_next, |r| r.src.start);
|
|
||||||
for (
|
|
||||||
i,
|
|
||||||
EsbuildSection {
|
|
||||||
escaped: min_code,
|
|
||||||
src,
|
|
||||||
},
|
|
||||||
) in results.iter().enumerate()
|
|
||||||
{
|
|
||||||
// Resulting minified JS/CSS to write.
|
|
||||||
let min_len = if min_code.len() < src.len() {
|
|
||||||
self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code);
|
|
||||||
min_code.len()
|
|
||||||
} else {
|
|
||||||
// If minified result is actually longer than source, then write source instead.
|
|
||||||
// NOTE: We still need to write source as previous iterations may have shifted code down.
|
|
||||||
self.code.copy_within(src.start..src.end, write_next);
|
|
||||||
src.len()
|
|
||||||
};
|
|
||||||
let write_end = write_next + min_len;
|
|
||||||
let next_start = results.get(i + 1).map_or(self.write_next, |r| r.src.start);
|
|
||||||
self.code.copy_within(src.end..next_start, write_end);
|
|
||||||
write_next = write_end + (next_start - src.end);
|
|
||||||
}
|
|
||||||
Ok(write_next)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for Processor<'_> {
|
impl Debug for Processor<'_> {
|
||||||
|
|
|
@ -25,14 +25,12 @@ pub fn eval_with_cfg(src: &'static [u8], expected: &'static [u8], cfg: &Cfg) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
pub fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
pub fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||||
let mut cfg = Cfg::new();
|
let mut cfg = Cfg::new();
|
||||||
cfg.minify_js = true;
|
cfg.minify_js = true;
|
||||||
eval_with_cfg(src, expected, &cfg);
|
eval_with_cfg(src, expected, &cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
pub fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
pub fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
||||||
let mut cfg = Cfg::new();
|
let mut cfg = Cfg::new();
|
||||||
cfg.minify_css = true;
|
cfg.minify_css = true;
|
||||||
|
|
|
@ -1,31 +1,13 @@
|
||||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
use {
|
|
||||||
crate::proc::checkpoint::WriteCheckpoint,
|
|
||||||
crate::proc::EsbuildSection,
|
|
||||||
esbuild_rs::{TransformOptions, TransformOptionsBuilder},
|
|
||||||
std::sync::Arc,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::cfg::Cfg;
|
use crate::cfg::Cfg;
|
||||||
use crate::err::ProcessingResult;
|
use crate::err::ProcessingResult;
|
||||||
|
use crate::proc::checkpoint::WriteCheckpoint;
|
||||||
use crate::proc::MatchAction::*;
|
use crate::proc::MatchAction::*;
|
||||||
use crate::proc::MatchMode::*;
|
use crate::proc::MatchMode::*;
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
lazy_static! {
|
|
||||||
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
|
|
||||||
let mut builder = TransformOptionsBuilder::new();
|
|
||||||
builder.minify_identifiers = true;
|
|
||||||
builder.minify_syntax = true;
|
|
||||||
builder.minify_whitespace = true;
|
|
||||||
builder.build()
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new()
|
static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new()
|
||||||
.ascii_case_insensitive(true)
|
.ascii_case_insensitive(true)
|
||||||
|
@ -34,54 +16,23 @@ lazy_static! {
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingResult<()> {
|
pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingResult<()> {
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
let start = WriteCheckpoint::new(proc);
|
let start = WriteCheckpoint::new(proc);
|
||||||
proc.require_not_at_end()?;
|
proc.require_not_at_end()?;
|
||||||
proc.m(WhileNotSeq(&SCRIPT_END), Keep);
|
let src = proc.m(WhileNotSeq(&SCRIPT_END), Discard);
|
||||||
// `process_tag` will require closing tag.
|
// `process_tag` will require closing tag.
|
||||||
|
|
||||||
// TODO This is copied from style.rs.
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
if js && cfg.minify_js {
|
if js && cfg.minify_js {
|
||||||
let (wg, results) = proc.new_esbuild_section();
|
// TODO Write to `out` directly, but only if we can guarantee that the length will never exceed the input.
|
||||||
let src = start.written_range(proc);
|
let mut output = Vec::new();
|
||||||
unsafe {
|
let result = minify_js::minify(proc[src].to_vec(), &mut output);
|
||||||
esbuild_rs::transform_direct_unmanaged(
|
// TODO Collect error as warning.
|
||||||
&proc[src],
|
if !result.is_err() && output.len() < src.len() {
|
||||||
&TRANSFORM_OPTIONS.clone(),
|
proc.write_slice(output.as_slice());
|
||||||
move |result| {
|
} else {
|
||||||
let mut guard = results.lock().unwrap();
|
proc.write_range(src);
|
||||||
// TODO Handle other forms:
|
|
||||||
// 1 < /script/.exec(a).length
|
|
||||||
// ` ${` ${a</script/} `} `
|
|
||||||
// // </script>
|
|
||||||
// /* </script>
|
|
||||||
// Considerations:
|
|
||||||
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
|
|
||||||
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
|
|
||||||
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
|
|
||||||
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
|
|
||||||
let mut escaped = Vec::<u8>::new();
|
|
||||||
// SCRIPT_END must be case insensitive.
|
|
||||||
SCRIPT_END.replace_all_with_bytes(
|
|
||||||
result.code.as_str().trim().as_bytes(),
|
|
||||||
&mut escaped,
|
|
||||||
|_, orig, dst| {
|
|
||||||
dst.extend(b"<\\/");
|
|
||||||
// Keep original case.
|
|
||||||
dst.extend(&orig[2..]);
|
|
||||||
true
|
|
||||||
},
|
|
||||||
);
|
|
||||||
guard.push(EsbuildSection { src, escaped });
|
|
||||||
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
|
|
||||||
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
|
|
||||||
drop(guard);
|
|
||||||
drop(results);
|
|
||||||
drop(wg);
|
|
||||||
},
|
|
||||||
);
|
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
proc.write_range(src);
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -1,32 +1,16 @@
|
||||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
||||||
|
use css_minify::optimizations::{Level, Minifier};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
use std::str::from_utf8_unchecked;
|
||||||
use {
|
|
||||||
crate::proc::checkpoint::WriteCheckpoint,
|
|
||||||
crate::proc::EsbuildSection,
|
|
||||||
esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder},
|
|
||||||
std::sync::Arc,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::err::ProcessingResult;
|
use crate::err::ProcessingResult;
|
||||||
|
use crate::proc::checkpoint::WriteCheckpoint;
|
||||||
use crate::proc::MatchAction::*;
|
use crate::proc::MatchAction::*;
|
||||||
use crate::proc::MatchMode::*;
|
use crate::proc::MatchMode::*;
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::Cfg;
|
use crate::Cfg;
|
||||||
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
lazy_static! {
|
|
||||||
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
|
|
||||||
let mut builder = TransformOptionsBuilder::new();
|
|
||||||
builder.loader = Loader::CSS;
|
|
||||||
builder.minify_identifiers = true;
|
|
||||||
builder.minify_syntax = true;
|
|
||||||
builder.minify_whitespace = true;
|
|
||||||
builder.build()
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new()
|
static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new()
|
||||||
.ascii_case_insensitive(true)
|
.ascii_case_insensitive(true)
|
||||||
|
@ -35,45 +19,23 @@ lazy_static! {
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
|
pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
let start = WriteCheckpoint::new(proc);
|
let start = WriteCheckpoint::new(proc);
|
||||||
proc.require_not_at_end()?;
|
proc.require_not_at_end()?;
|
||||||
proc.m(WhileNotSeq(&STYLE_END), Keep);
|
let src = proc.m(WhileNotSeq(&STYLE_END), Discard);
|
||||||
// `process_tag` will require closing tag.
|
// `process_tag` will require closing tag.
|
||||||
|
|
||||||
// TODO This is copied from script.rs.
|
|
||||||
#[cfg(feature = "js-esbuild")]
|
|
||||||
if cfg.minify_css {
|
if cfg.minify_css {
|
||||||
let (wg, results) = proc.new_esbuild_section();
|
let result = Minifier::default()
|
||||||
let src = start.written_range(proc);
|
.minify(unsafe { from_utf8_unchecked(&proc[src]) }, Level::Three)
|
||||||
unsafe {
|
.ok();
|
||||||
esbuild_rs::transform_direct_unmanaged(
|
// TODO Collect error as warning.
|
||||||
&proc[src],
|
if result.as_ref().filter(|r| r.len() < src.len()).is_some() {
|
||||||
&TRANSFORM_OPTIONS.clone(),
|
proc.write_slice(result.unwrap().as_bytes());
|
||||||
move |result| {
|
} else {
|
||||||
let mut guard = results.lock().unwrap();
|
proc.write_range(src);
|
||||||
// TODO Are there other places that can have unintentional closing tags?
|
|
||||||
let mut escaped = Vec::<u8>::new();
|
|
||||||
// STYLE_END must be case insensitive.
|
|
||||||
STYLE_END.replace_all_with_bytes(
|
|
||||||
result.code.as_str().trim().as_bytes(),
|
|
||||||
&mut escaped,
|
|
||||||
|_, orig, dst| {
|
|
||||||
dst.extend(b"<\\/");
|
|
||||||
// Keep original case.
|
|
||||||
dst.extend(&orig[2..]);
|
|
||||||
true
|
|
||||||
},
|
|
||||||
);
|
|
||||||
guard.push(EsbuildSection { src, escaped });
|
|
||||||
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
|
|
||||||
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
|
|
||||||
drop(guard);
|
|
||||||
drop(results);
|
|
||||||
drop(wg);
|
|
||||||
},
|
|
||||||
);
|
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
proc.write_range(src);
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
4
version
4
version
|
@ -93,9 +93,7 @@ if (
|
||||||
cmd("git", "pull");
|
cmd("git", "pull");
|
||||||
cmd("bash", "./prebuild.sh");
|
cmd("bash", "./prebuild.sh");
|
||||||
cmd("cargo", "test", { workingDir: RUST_MAIN_DIR });
|
cmd("cargo", "test", { workingDir: RUST_MAIN_DIR });
|
||||||
cmd("cargo", "test", "--features", "js-esbuild", {
|
cmd("cargo", "test", { workingDir: RUST_ONEPASS_DIR });
|
||||||
workingDir: RUST_ONEPASS_DIR,
|
|
||||||
});
|
|
||||||
|
|
||||||
for (const f of [
|
for (const f of [
|
||||||
`${RUST_MAIN_DIR}/Cargo.toml`,
|
`${RUST_MAIN_DIR}/Cargo.toml`,
|
||||||
|
|
Loading…
Reference in New Issue