diff --git a/README.md b/README.md index 083ffa1..b6b7f0d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # minify-html -An HTML minifier meticulously optimised for both speed and effectiveness, available for Rust, Node.js, Python, Java, and Ruby. +An HTML minifier meticulously optimised for both speed and effectiveness written in Rust. +Comes with native bindings to Node.js, Python, Java, and Ruby. - Advanced minification strategy beats other minifiers with only one pass. - Uses zero memory allocations, SIMD searching, direct tries, and lookup tables. @@ -45,7 +46,7 @@ minify-html --src /path/to/src.html --out /path/to/output.min.html minify-html = { version = "0.3.8", features = ["js-esbuild"] } ``` -Building with the `js-esbuild` feature requires the Go compiler to be installed as well, to build the [JS minifier](https://github.com/evanw/esbuild). +Building with the `js-esbuild` feature requires the Go compiler to be installed as well, to build the [JS minifier](https://github.com/wilsonzlin/esbuild-rs). If the `js-esbuild` feature is not enabled, `cfg.minify_js` will have no effect. @@ -415,9 +416,7 @@ Numeric entities that do not refer to a valid [Unicode Scalar Value](https://www If an entity is unintentionally formed after decoding, the leading ampersand is encoded, e.g. `&amp;` becomes `&amp;`. This is done as `&` is equal to or shorter than all other entity representations of characters part of an entity (`[&#a-zA-Z0-9;]`), and there is no other conflicting entity name that starts with `amp`. -It's possible to get an unintentional entity after removing comments, e.g. `&amp`. - -Left chevrons after any decoding in text are encoded to `<` if possible or `<` otherwise. +Note that it's possible to get an unintentional entity after removing comments, e.g. `&amp`; minify-html will **not** encode the leading ampersand. ### Comments diff --git a/src/unit/content.rs b/src/unit/content.rs index 0e47790..8bdc0f3 100644 --- a/src/unit/content.rs +++ b/src/unit/content.rs @@ -157,8 +157,8 @@ pub fn process_content(proc: &mut Processor, cfg: &Cfg, ns: Namespace, parent: O if proc.last_is(b'<') && ( TAG_NAME_CHAR[c] || c == b'?' || c == b'!' || c == b'/' ) { - // If this is a tag name char and we just wrote `<` (decoded or original), - // we need to encode the `<`. + // We need to encode the `<` that we just wrote as otherwise this char will + // cause it to be interpreted as something else (e.g. opening tag). // NOTE: This conditional should mean that we never have to worry about a // semicolon after encoded `<` becoming `<` and part of the entity, as the // only time `<` appears is when we write it here; every other time we always diff --git a/src/unit/tag.rs b/src/unit/tag.rs index 8be2771..d71187b 100644 --- a/src/unit/tag.rs +++ b/src/unit/tag.rs @@ -94,7 +94,14 @@ impl MaybeClosingTag { } // TODO Comment param `prev_sibling_closing_tag`. -pub fn process_tag(proc: &mut Processor, cfg: &Cfg, ns: Namespace, parent: Option, mut prev_sibling_closing_tag: MaybeClosingTag, source_tag_name: ProcessorRange) -> ProcessingResult { +pub fn process_tag( + proc: &mut Processor, + cfg: &Cfg, + ns: Namespace, + parent: Option, + mut prev_sibling_closing_tag: MaybeClosingTag, + source_tag_name: ProcessorRange, +) -> ProcessingResult { if prev_sibling_closing_tag.exists_and(|prev_tag| !can_omit_as_before(proc, Some(prev_tag), source_tag_name)) { prev_sibling_closing_tag.write(proc); };