From c5bfac177c691fb887271154751efce773df3b67 Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Wed, 22 Jun 2022 20:13:36 +1000 Subject: [PATCH] Drop mismatched closing tags instead of reinterpreting as opening tags --- CHANGELOG.md | 4 ++++ notes/Parsing.md | 2 +- rust/main/src/parse/content.rs | 4 ++-- rust/main/src/parse/element.rs | 3 +-- rust/main/src/tests/mod.rs | 10 +++++----- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd8cd26..55ef394 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # minify-html changelog +## 0.10.0 + +- Drop unmatched closing tags instead of reinterpreting them as opening tags. This avoids the possibility of unintentionally creating a large deep tree due to malformed inputs where there are repeated unmatched closing tags (e.g. broken HTML template). + ## 0.9.2 - Fix Node.js dependency version. diff --git a/notes/Parsing.md b/notes/Parsing.md index 2253cd6..22edbad 100644 --- a/notes/Parsing.md +++ b/notes/Parsing.md @@ -25,7 +25,7 @@ If the input ends while in the middle of a tag or attribute value, that tag/attr |Attribute-like syntax in closing tags are parsed like attributes but ignored.|`
5`|`
`| |If the character following `` is dropped. It is not considered a closing tag, even as an invalid one.|`
">5`|`
">5`| |If a closing tag represents a void element, the closing tag is dropped.|`

ax
i
`|`

axi
`| -|If a closing tag does not match the opening tag, and the closing tag cannot be omitted as per the spec, the closing tag is reinterpreted as an opening tag. NOTE: Most browsers have far more complex logic.|`
5`|`
5`| +|If a closing tag does not match the opening tag, and the closing tag cannot be omitted as per the spec, the closing tag is ignored. NOTE: Most browsers have far more complex logic.|`
5`|`
5`| |If an opening tag ends with `/>` instead of `>`, and it's an HTML tag, the `/` is ignored. If it's an SVG tag, it's self-closing.|`
5
`|`
5
`| |A slash as the last character of an unquoted attribute value immediately preceding a `>` is not interpreted as part of the self-closing syntax `/>`, even for self-closable SVG elements.|``|``| |Any opening `html`, `head`, or `body` tags after the first are ignored.|`
`|`
`| diff --git a/rust/main/src/parse/content.rs b/rust/main/src/parse/content.rs index 73de2b7..364c2f9 100644 --- a/rust/main/src/parse/content.rs +++ b/rust/main/src/parse/content.rs @@ -172,8 +172,8 @@ pub fn parse_content( // Closing tag for void element, drop. typ = IgnoredTag; } else if parent.is_empty() || parent != name.as_slice() { - // Closing tag mismatch, reinterpret as opening tag. - typ = OpeningTag; + // Closing tag mismatch, drop. + typ = IgnoredTag; }; }; typ = maybe_ignore_html_head_body(code, typ, parent, &name); diff --git a/rust/main/src/parse/element.rs b/rust/main/src/parse/element.rs index 6534d90..1bea497 100644 --- a/rust/main/src/parse/element.rs +++ b/rust/main/src/parse/element.rs @@ -124,8 +124,7 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag { } } -// `<` or ` NodeData { let ParsedTag { name: elem_name, diff --git a/rust/main/src/tests/mod.rs b/rust/main/src/tests/mod.rs index 0eaa2c0..5d90960 100644 --- a/rust/main/src/tests/mod.rs +++ b/rust/main/src/tests/mod.rs @@ -80,14 +80,14 @@ fn test_removal_of_html_and_head_opening_tags() { #[test] fn test_unmatched_closing_tag() { - eval(b"Hello

Goodbye", b"Hello

Goodbye"); + eval(b"Hello

Goodbye", b"HelloGoodbye"); eval(b"Hello

Goodbye", b"Hello
Goodbye"); - eval(b"
Hello

Goodbye", b"
Hello

Goodbye"); - eval(b"

  • a

    ", b"
    • a

      "); - eval(b"

      • a

        ", b"
        • a

          "); + eval(b"

          Hello

          Goodbye", b"
          HelloGoodbye"); + eval(b"
          • a

            ", b"
            • a"); + eval(b"
              • a

                ", b"
                • a"); eval( b"
                  • a

                    ", - b"
                    • a

                      ", + b"

                      • a", ); }