From ca30897dae1baed857e7f50a52c9736e93e1828a Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Tue, 10 Aug 2021 17:35:14 +1000 Subject: [PATCH] Trim script and style elements --- debug/diff/canonicalise | 10 +++++++--- debug/diff/charlines/.gitignore | 2 ++ debug/diff/charlines/Cargo.toml | 5 +++++ debug/diff/charlines/README.md | 3 +++ debug/diff/charlines/src/main.rs | 10 ++++++++++ format | 1 + rust/common/whitespace.rs | 14 ++++++++++++++ rust/main/src/minify/css.rs | 5 +++-- rust/main/src/minify/js.rs | 7 ++++--- 9 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 debug/diff/charlines/.gitignore create mode 100644 debug/diff/charlines/Cargo.toml create mode 100644 debug/diff/charlines/README.md create mode 100644 debug/diff/charlines/src/main.rs diff --git a/debug/diff/canonicalise b/debug/diff/canonicalise index 921647e..3963a20 100755 --- a/debug/diff/canonicalise +++ b/debug/diff/canonicalise @@ -1,14 +1,18 @@ #!/usr/bin/env bash -set -Eeuo pipefail +set -Eeo pipefail pushd "$(dirname "$0")" >/dev/null cargo build --manifest-path c14n/Cargo.toml --release +cargo build --manifest-path charlines/Cargo.toml --release for f in outputs/*/*; do - src=$(cat "$f") - c14n/target/release/c14n <<< "$src" > "$f" + out=$(c14n/target/release/c14n < "$f") + if [[ "$CHARLINES" == "1" ]]; then + out=$(charlines/target/release/charlines <<< "$out") + fi + cat <<< "$out" > "$f" done popd >/dev/null diff --git a/debug/diff/charlines/.gitignore b/debug/diff/charlines/.gitignore new file mode 100644 index 0000000..042776a --- /dev/null +++ b/debug/diff/charlines/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target/ diff --git a/debug/diff/charlines/Cargo.toml b/debug/diff/charlines/Cargo.toml new file mode 100644 index 0000000..134fea2 --- /dev/null +++ b/debug/diff/charlines/Cargo.toml @@ -0,0 +1,5 @@ +[package] +publish = false +name = "charlines" +version = "0.0.1" +edition = "2018" diff --git a/debug/diff/charlines/README.md b/debug/diff/charlines/README.md new file mode 100644 index 0000000..9253b43 --- /dev/null +++ b/debug/diff/charlines/README.md @@ -0,0 +1,3 @@ +# charlines + +Output each character from stdin onto its own stdout line. Useful for subsequence diffing when text does not naturally have a lot of line breaks (e.g. minified HTML). diff --git a/debug/diff/charlines/src/main.rs b/debug/diff/charlines/src/main.rs new file mode 100644 index 0000000..97fa2cf --- /dev/null +++ b/debug/diff/charlines/src/main.rs @@ -0,0 +1,10 @@ +use std::io::{stdin, stdout, Read, Write}; + +fn main() { + let mut src = Vec::new(); + stdin().read_to_end(&mut src).unwrap(); + let mut out = stdout(); + for c in src { + out.write_all(&[c, b'\n']).unwrap(); + } +} diff --git a/format b/format index d4afeb4..eb60258 100755 --- a/format +++ b/format @@ -11,6 +11,7 @@ for dir in \ bench/runners/minify-html-onepass \ cli \ debug/diff/c14n \ + debug/diff/charlines \ fuzz \ fuzz/process \ java \ diff --git a/rust/common/whitespace.rs b/rust/common/whitespace.rs index 15ea6ce..c28e998 100644 --- a/rust/common/whitespace.rs +++ b/rust/common/whitespace.rs @@ -1,5 +1,19 @@ use crate::common::gen::codepoints::WHITESPACE; +pub fn trimmed(val: &[u8]) -> &[u8] { + let mut start = 0; + while val.get(start).filter(|&&c| WHITESPACE[c]).is_some() { + start += 1; + } + + let mut end = val.len(); + while end > start && val.get(end - 1).filter(|&&c| WHITESPACE[c]).is_some() { + end -= 1; + } + + &val[start..end] +} + pub fn left_trim(val: &mut Vec) { let mut len = 0; while val.get(len).filter(|&&c| WHITESPACE[c]).is_some() { diff --git a/rust/main/src/minify/css.rs b/rust/main/src/minify/css.rs index 255f555..26a7543 100644 --- a/rust/main/src/minify/css.rs +++ b/rust/main/src/minify/css.rs @@ -9,6 +9,7 @@ use { }; use crate::cfg::Cfg; +use crate::common::whitespace::trimmed; #[cfg(feature = "js-esbuild")] lazy_static! { @@ -27,13 +28,13 @@ lazy_static! { #[cfg(not(feature = "js-esbuild"))] pub fn minify_css(_cfg: &Cfg, out: &mut Vec, code: &[u8]) { - out.extend_from_slice(&code); + out.extend_from_slice(trimmed(code)); } #[cfg(feature = "js-esbuild")] pub fn minify_css(cfg: &Cfg, out: &mut Vec, code: &[u8]) { if !cfg.minify_css { - out.extend_from_slice(&code); + out.extend_from_slice(trimmed(code)); } else { minify_using_esbuild(out, code, &MINIFY_CSS_TRANSFORM_OPTIONS.clone()); } diff --git a/rust/main/src/minify/js.rs b/rust/main/src/minify/js.rs index 3b86c15..7a11f9a 100644 --- a/rust/main/src/minify/js.rs +++ b/rust/main/src/minify/js.rs @@ -6,7 +6,8 @@ use { std::sync::Arc, }; -use crate::Cfg; +use crate::cfg::Cfg; +use crate::common::whitespace::trimmed; #[cfg(feature = "js-esbuild")] lazy_static! { @@ -24,13 +25,13 @@ lazy_static! { #[cfg(not(feature = "js-esbuild"))] pub fn minify_js(_cfg: &Cfg, out: &mut Vec, code: &[u8]) { - out.extend_from_slice(&code); + out.extend_from_slice(trimmed(code)); } #[cfg(feature = "js-esbuild")] pub fn minify_js(cfg: &Cfg, out: &mut Vec, code: &[u8]) { if !cfg.minify_js { - out.extend_from_slice(&code); + out.extend_from_slice(trimmed(code)); } else { minify_using_esbuild(out, code, &TRANSFORM_OPTIONS.clone()); }