Drop mismatched closing tags instead of reinterpreting as opening tags

This commit is contained in:
Wilson Lin 2022-06-22 20:13:36 +10:00
parent 2947c0af1d
commit c5bfac177c
5 changed files with 13 additions and 10 deletions

View File

@ -1,5 +1,9 @@
# minify-html changelog
## 0.10.0
- Drop unmatched closing tags instead of reinterpreting them as opening tags. This avoids the possibility of unintentionally creating a large deep tree due to malformed inputs where there are repeated unmatched closing tags (e.g. broken HTML template).
## 0.9.2
- Fix Node.js dependency version.

View File

@ -25,7 +25,7 @@ If the input ends while in the middle of a tag or attribute value, that tag/attr
|Attribute-like syntax in closing tags are parsed like attributes but ignored.|`<div></div x=">">5`|`<div></div>`|
|If the character following `</` is not a valid tag name character, all code until the next `>` is dropped. It is not considered a closing tag, even as an invalid one.|`<div></ div x=">">5`|`<div>">5`|
|If a closing tag represents a void element, the closing tag is dropped.|`<div><br>ax</br><img></img>i</div>`|`<div><br>ax<img>i</div>`|
|If a closing tag does not match the opening tag, and the closing tag cannot be omitted as per the spec, the closing tag is reinterpreted as an opening tag. NOTE: Most browsers have far more complex logic.|`<div><span></div></span>5`|`<div><span><div><span>5`|
|If a closing tag does not match the opening tag, and the closing tag cannot be omitted as per the spec, the closing tag is ignored. NOTE: Most browsers have far more complex logic.|`<div><span></div></span>5`|`<div><span><span>5`|
|If an opening tag ends with `/>` instead of `>`, and it's an HTML tag, the `/` is ignored. If it's an SVG tag, it's self-closing.|`<div/>5<div></div>`|`<div>5<div></div>`|
|A slash as the last character of an unquoted attribute value immediately preceding a `>` is not interpreted as part of the self-closing syntax `/>`, even for self-closable SVG elements.|`<circle r=1/>`|`<circle r="1/">`|
|Any opening `html`, `head`, or `body` tags after the first are ignored.|`<html><head><meta><body><div><head><span><body>`|`<html><head><meta><body><div><span>`|

View File

@ -172,8 +172,8 @@ pub fn parse_content(
// Closing tag for void element, drop.
typ = IgnoredTag;
} else if parent.is_empty() || parent != name.as_slice() {
// Closing tag mismatch, reinterpret as opening tag.
typ = OpeningTag;
// Closing tag mismatch, drop.
typ = IgnoredTag;
};
};
typ = maybe_ignore_html_head_body(code, typ, parent, &name);

View File

@ -124,8 +124,7 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
}
}
// `<` or `</` must be next. If `</` is next, tag is reinterpreted as opening tag (i.e. `/` is ignored).
// `parent` should be an empty slice if it doesn't exist.
// `<` must be next. `parent` should be an empty slice if it doesn't exist.
pub fn parse_element(code: &mut Code, ns: Namespace, parent: &[u8]) -> NodeData {
let ParsedTag {
name: elem_name,

View File

@ -80,14 +80,14 @@ fn test_removal_of_html_and_head_opening_tags() {
#[test]
fn test_unmatched_closing_tag() {
eval(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
eval(b"Hello</p>Goodbye", b"HelloGoodbye");
eval(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
eval(b"<div>Hello</p>Goodbye", b"<div>Hello<p>Goodbye");
eval(b"<ul><li>a</p>", b"<ul><li>a<p>");
eval(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
eval(b"<div>Hello</p>Goodbye", b"<div>HelloGoodbye");
eval(b"<ul><li>a</p>", b"<ul><li>a");
eval(b"<ul><li><rt>a</p>", b"<ul><li><rt>a");
eval(
b"<html><head><body><ul><li><rt>a</p>",
b"<html><head><body><ul><li><rt>a<p>",
b"<html><head><body><ul><li><rt>a",
);
}