From 5cef4219e65d9b7e7858f54c7afbbe7e31e50daa Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Mon, 9 Aug 2021 22:20:33 +1000 Subject: [PATCH] Fix [style] and script[type] minification; optimise attr ordering; refactor bench runners --- bench/runners/.gitignore | 2 + bench/runners/@minify-html%2Fjs/index.js | 27 +-------- bench/runners/common.js | 56 +++++++++++++++++ bench/runners/html-minifier/index.js | 40 +----------- bench/runners/html-minifier/package.json | 1 - bench/runners/minimize/index.js | 40 +----------- bench/runners/minimize/package.json | 1 - bench/runners/package.json | 6 ++ debug/diff/run | 2 +- format | 2 +- rust/main/Cargo.toml | 2 +- rust/main/src/minify/attr.rs | 2 +- rust/main/src/minify/element.rs | 77 +++++++++++++----------- rust/onepass/Cargo.toml | 2 +- 14 files changed, 117 insertions(+), 143 deletions(-) create mode 100644 bench/runners/.gitignore create mode 100644 bench/runners/common.js create mode 100644 bench/runners/package.json diff --git a/bench/runners/.gitignore b/bench/runners/.gitignore new file mode 100644 index 0000000..7578d1f --- /dev/null +++ b/bench/runners/.gitignore @@ -0,0 +1,2 @@ +/package-lock.json +node_modules/ diff --git a/bench/runners/@minify-html%2Fjs/index.js b/bench/runners/@minify-html%2Fjs/index.js index 746ff87..66c3608 100644 --- a/bench/runners/@minify-html%2Fjs/index.js +++ b/bench/runners/@minify-html%2Fjs/index.js @@ -1,32 +1,9 @@ -const fs = require("fs"); const minifyHtml = require("@minify-html/js"); -const path = require("path"); - -const iterations = parseInt(process.env.MHB_ITERATIONS, 10); -const inputDir = process.env.MHB_INPUT_DIR; -const htmlOnly = process.env.MHB_HTML_ONLY === "1"; -const outputDir = process.env.MHB_OUTPUT_DIR; +const { htmlOnly, run } = require("../common"); const minifyHtmlCfg = minifyHtml.createConfiguration({ minify_css: !htmlOnly, minify_js: !htmlOnly, }); -const results = fs.readdirSync(inputDir).map((name) => { - const src = fs.readFileSync(path.join(inputDir, name)); - - const out = minifyHtml.minify(src, minifyHtmlCfg); - const len = out.byteLength; - if (outputDir) { - fs.writeFileSync(path.join(outputDir, name), out); - } - - const start = process.hrtime.bigint(); - for (let i = 0; i < iterations; i++) { - minifyHtml.minify(src, minifyHtmlCfg); - } - const elapsed = process.hrtime.bigint() - start; - - return [name, len, iterations, Number(elapsed) / 1_000_000_000]; -}); -console.log(JSON.stringify(results)); +run((src) => minifyHtml.minify(src, minifyHtmlCfg)); diff --git a/bench/runners/common.js b/bench/runners/common.js new file mode 100644 index 0000000..3b78466 --- /dev/null +++ b/bench/runners/common.js @@ -0,0 +1,56 @@ +const esbuild = require("esbuild"); +const fs = require("fs"); +const path = require("path"); + +const iterations = parseInt(process.env.MHB_ITERATIONS, 10); +const inputDir = process.env.MHB_INPUT_DIR; +const htmlOnly = process.env.MHB_HTML_ONLY === "1"; +const outputDir = process.env.MHB_OUTPUT_DIR; + +module.exports = { + htmlOnly, + + esbuildCss: (code, type) => { + if (type === "inline") { + code = `x{${code}}`; + } + code = esbuild.transformSync(code, { + loader: "css", + minify: true, + }).code; + if (type === "inline") { + code = code.slice(2, -1); + } + return code; + }, + + esbuildJs: (code) => + esbuild.transformSync(code, { + loader: "js", + minify: true, + }).code, + + run: (minifierFn) => { + console.log( + JSON.stringify( + fs.readdirSync(inputDir).map((name) => { + const src = fs.readFileSync(path.join(inputDir, name), "utf8"); + + const out = minifierFn(src); + const len = Buffer.from(out).length; + if (outputDir) { + fs.writeFileSync(path.join(outputDir, name), out); + } + + const start = process.hrtime.bigint(); + for (let i = 0; i < iterations; i++) { + minifierFn(src); + } + const elapsed = process.hrtime.bigint() - start; + + return [name, len, iterations, Number(elapsed) / 1_000_000_000]; + }) + ) + ); + }, +}; diff --git a/bench/runners/html-minifier/index.js b/bench/runners/html-minifier/index.js index cc3e9c4..d7c2448 100644 --- a/bench/runners/html-minifier/index.js +++ b/bench/runners/html-minifier/index.js @@ -1,24 +1,5 @@ -const esbuild = require("esbuild"); -const fs = require("fs"); const htmlMinifier = require("html-minifier"); -const path = require("path"); - -const iterations = parseInt(process.env.MHB_ITERATIONS, 10); -const inputDir = process.env.MHB_INPUT_DIR; -const htmlOnly = process.env.MHB_HTML_ONLY === "1"; -const outputDir = process.env.MHB_OUTPUT_DIR; - -const esbuildCss = (code) => - esbuild.transformSync(code, { - loader: "css", - minify: true, - }).code; - -const esbuildJs = (code) => - esbuild.transformSync(code, { - loader: "js", - minify: true, - }).code; +const { htmlOnly, esbuildCss, esbuildJs, run } = require("../common"); const htmlMinifierCfg = { collapseBooleanAttributes: true, @@ -46,21 +27,4 @@ const htmlMinifierCfg = { useShortDoctype: true, }; -const results = fs.readdirSync(inputDir).map((name) => { - const src = fs.readFileSync(path.join(inputDir, name), "utf8"); - - const out = htmlMinifier.minify(src, htmlMinifierCfg); - const len = Buffer.from(out, "utf8").length; - if (outputDir) { - fs.writeFileSync(path.join(outputDir, name), out); - } - - const start = process.hrtime.bigint(); - for (let i = 0; i < iterations; i++) { - htmlMinifier.minify(src, htmlMinifierCfg); - } - const elapsed = process.hrtime.bigint() - start; - - return [name, len, iterations, Number(elapsed) / 1_000_000_000]; -}); -console.log(JSON.stringify(results)); +run((src) => htmlMinifier.minify(src, htmlMinifierCfg)); diff --git a/bench/runners/html-minifier/package.json b/bench/runners/html-minifier/package.json index 5ca503d..1e4388f 100644 --- a/bench/runners/html-minifier/package.json +++ b/bench/runners/html-minifier/package.json @@ -1,7 +1,6 @@ { "private": true, "dependencies": { - "esbuild": "0.12.19", "html-minifier": "4.0.0" } } diff --git a/bench/runners/minimize/index.js b/bench/runners/minimize/index.js index 4aa5628..0e524ee 100644 --- a/bench/runners/minimize/index.js +++ b/bench/runners/minimize/index.js @@ -1,12 +1,5 @@ -const esbuild = require("esbuild"); -const fs = require("fs"); const minimize = require("minimize"); -const path = require("path"); - -const iterations = parseInt(process.env.MHB_ITERATIONS, 10); -const inputDir = process.env.MHB_INPUT_DIR; -const htmlOnly = process.env.MHB_HTML_ONLY === "1"; -const outputDir = process.env.MHB_OUTPUT_DIR; +const { htmlOnly, esbuildCss, esbuildJs, run } = require("../common"); const jsMime = new Set([ undefined, @@ -28,18 +21,6 @@ const jsMime = new Set([ "text/x-javascript", ]); -const esbuildCss = (code) => - esbuild.transformSync(code, { - loader: "css", - minify: true, - }).code; - -const esbuildJs = (code) => - esbuild.transformSync(code, { - loader: "js", - minify: true, - }).code; - const jsCssPlugin = { id: "esbuild", element: (node, next) => { @@ -61,21 +42,4 @@ const plugins = htmlOnly ? [] : [jsCssPlugin]; const minifier = new minimize({ plugins }); -const results = fs.readdirSync(inputDir).map((name) => { - const src = fs.readFileSync(path.join(inputDir, name), "utf8"); - - const out = minifier.parse(src); - const len = Buffer.from(out, "utf8").length; - if (outputDir) { - fs.writeFileSync(path.join(outputDir, name), out); - } - - const start = process.hrtime.bigint(); - for (let i = 0; i < iterations; i++) { - minifier.parse(src); - } - const elapsed = process.hrtime.bigint() - start; - - return [name, len, iterations, Number(elapsed) / 1_000_000_000]; -}); -console.log(JSON.stringify(results)); +run((src) => minifier.parse(src)); diff --git a/bench/runners/minimize/package.json b/bench/runners/minimize/package.json index 1026f79..4cad300 100644 --- a/bench/runners/minimize/package.json +++ b/bench/runners/minimize/package.json @@ -1,7 +1,6 @@ { "private": true, "dependencies": { - "esbuild": "0.12.19", "minimize": "2.2.0" } } diff --git a/bench/runners/package.json b/bench/runners/package.json new file mode 100644 index 0000000..df6f40c --- /dev/null +++ b/bench/runners/package.json @@ -0,0 +1,6 @@ +{ + "private": true, + "dependencies": { + "esbuild": "0.12.19" + } +} diff --git a/debug/diff/run b/debug/diff/run index cda35ba..e6da8f4 100755 --- a/debug/diff/run +++ b/debug/diff/run @@ -14,7 +14,7 @@ mkdir -p "$output_dir" pushd "../../bench/runners" >/dev/null for r in *; do - if [ ! -d "$r" ]; then + if [[ ! -d "$r" ]] || [[ "$r" == "node_modules" ]]; then continue fi mkdir -p "$output_dir/$r" diff --git a/format b/format index 16df857..48e2eee 100755 --- a/format +++ b/format @@ -4,7 +4,7 @@ set -Eeuxo pipefail pushd "$(dirname "$0")" >/dev/null -npx prettier@2.3.2 -w 'version' 'bench/*.{js,json}' 'bench/*/*.{js,json}' 'gen/*.{ts,json}' +npx prettier@2.3.2 -w 'version' 'bench/*.{js,json}' 'bench/runners/*.{js,json}' 'bench/runners/*/*.{js,json}' 'gen/*.{ts,json}' for dir in \ bench/runners/minify-html \ diff --git a/rust/main/Cargo.toml b/rust/main/Cargo.toml index 2265442..cdd1e20 100644 --- a/rust/main/Cargo.toml +++ b/rust/main/Cargo.toml @@ -22,6 +22,6 @@ js-esbuild = ["crossbeam", "esbuild-rs"] [dependencies] aho-corasick = "0.7" crossbeam = { version = "0.7", optional = true } -esbuild-rs = { version = "0.12.18", optional = true } +esbuild-rs = { version = "0.12.19", optional = true } lazy_static = "1.4" memchr = "2" diff --git a/rust/main/src/minify/attr.rs b/rust/main/src/minify/attr.rs index 63871fb..14d4df2 100644 --- a/rust/main/src/minify/attr.rs +++ b/rust/main/src/minify/attr.rs @@ -326,7 +326,7 @@ pub fn minify_attr( if (value_raw.is_empty() && redundant_if_empty) || default_value.filter(|dv| dv == &value_raw).is_some() // TODO Cfg. - || (tag == b"script" && JAVASCRIPT_MIME_TYPES.contains(value_raw.as_slice())) + || (tag == b"script" && name == b"type" && JAVASCRIPT_MIME_TYPES.contains(value_raw.as_slice())) { return AttrMinified::Redundant; }; diff --git a/rust/main/src/minify/element.rs b/rust/main/src/minify/element.rs index 6c7086b..e7cd9d5 100644 --- a/rust/main/src/minify/element.rs +++ b/rust/main/src/minify/element.rs @@ -7,13 +7,6 @@ use crate::common::spec::tag::omission::{can_omit_as_before, can_omit_as_last_no use crate::minify::attr::{minify_attr, AttrMinified}; use crate::minify::content::minify_content; -#[derive(Copy, Clone, Eq, PartialEq)] -enum LastAttr { - NoValue, - Quoted, - Unquoted, -} - pub fn minify_element( cfg: &Cfg, out: &mut Vec, @@ -30,49 +23,63 @@ pub fn minify_element( closing_tag: ElementClosingTag, children: Vec, ) { + // Output quoted attributes, followed by unquoted, to optimise space omission between attributes. + let mut quoted = Vec::new(); + let mut unquoted = Vec::new(); + + for (name, value) in attributes { + match minify_attr(cfg, ns, tag_name, &name, value) { + AttrMinified::Redundant => {} + a @ AttrMinified::NoValue => unquoted.push((name, a)), + AttrMinified::Value(v) => { + debug_assert!(v.len() > 0); + if v.quoted() { + quoted.push((name, v)); + } else { + unquoted.push((name, AttrMinified::Value(v))); + } + } + }; + } + + // Attributes list could become empty after minification, so check opening tag omission eligibility after attributes minification. let can_omit_opening_tag = (tag_name == b"html" || tag_name == b"head") - && attributes.is_empty() + && quoted.len() + unquoted.len() == 0 && !cfg.keep_html_and_head_opening_tags; let can_omit_closing_tag = !cfg.keep_closing_tags && (can_omit_as_before(tag_name, next_sibling_as_element_tag_name) || (is_last_child_text_or_element_node && can_omit_as_last_node(parent, tag_name))); - // TODO Attributes list could become empty after minification, making opening tag eligible for omission again. if !can_omit_opening_tag { out.push(b'<'); out.extend_from_slice(tag_name); - let mut last_attr = LastAttr::NoValue; - // TODO Further optimisation: order attrs based on optimal spacing strategy, given that spaces can be omitted after quoted attrs, and maybe after the tag name? - let mut attrs_sorted = attributes.into_iter().collect::>(); - attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - for (name, value) in attrs_sorted { - let min = minify_attr(cfg, ns, tag_name, &name, value); - if let AttrMinified::Redundant = min { - continue; - }; - if cfg.keep_spaces_between_attributes || last_attr != LastAttr::Quoted { + + for (i, (name, value)) in quoted.iter().enumerate() { + if i == 0 || cfg.keep_spaces_between_attributes { out.push(b' '); }; out.extend_from_slice(&name); - match min { - AttrMinified::NoValue => { - last_attr = LastAttr::NoValue; - } - AttrMinified::Value(v) => { - debug_assert!(v.len() > 0); - out.push(b'='); - v.out(out); - last_attr = if v.quoted() { - LastAttr::Quoted - } else { - LastAttr::Unquoted - }; - } - _ => unreachable!(), + out.push(b'='); + debug_assert!(value.quoted()); + value.out(out); + } + for (i, (name, value)) in unquoted.iter().enumerate() { + // Write a space between unquoted attributes, + // and after the tag name if it wasn't written already during `quoted` processing. + if i > 0 || (i == 0 && quoted.len() == 0) { + out.push(b' '); + }; + out.extend_from_slice(&name); + if let AttrMinified::Value(v) = value { + out.push(b'='); + v.out(out); }; } + if closing_tag == ElementClosingTag::SelfClosing { - if last_attr == LastAttr::Unquoted { + // Write a space after the tag name if there are no attributes, + // or the last attribute is unquoted. + if unquoted.len() > 0 || unquoted.len() + quoted.len() == 0 { out.push(b' '); }; out.push(b'/'); diff --git a/rust/onepass/Cargo.toml b/rust/onepass/Cargo.toml index eaabb9f..7fbbac9 100644 --- a/rust/onepass/Cargo.toml +++ b/rust/onepass/Cargo.toml @@ -22,6 +22,6 @@ js-esbuild = ["crossbeam", "esbuild-rs"] [dependencies] aho-corasick = "0.7" crossbeam = { version = "0.7", optional = true } -esbuild-rs = { version = "0.12.18", optional = true } +esbuild-rs = { version = "0.12.19", optional = true } lazy_static = "1.4" memchr = "2"