diff --git a/fuzz/in/tags.html b/fuzz/in/tags.html index 4e9bbce..de87686 100644 --- a/fuzz/in/tags.html +++ b/fuzz/in/tags.html @@ -2,6 +2,7 @@ + <title></titl>
&lt;
diff --git a/notes/Parsing.md b/notes/Parsing.md index 2f54732..a3aeea2 100644 --- a/notes/Parsing.md +++ b/notes/Parsing.md @@ -15,7 +15,7 @@ If the input ends while in the middle of a tag or attribute value, that tag/attr |Rule|Example source|Example interpretation| |---|---|---| |A tag name is one or more alphanumeric, `:`, or `-` characters|``|``| -|`script`, `style`, and `textarea` tags do not close until the case-insensitive sequence ``|``| +|`script`, `style`, `textarea`, and `title` tags do not close until the case-insensitive sequence ``|``| |Attribute-like syntax in closing tags are parsed like attributes but ignored.|`
5`|`
`| |If the character following `` is dropped. It is not considered a closing tag, even as an invalid one.|`
">5`|`
">5`| |If a closing tag represents a void element, the closing tag is dropped.|`

ax
i
`|`

axi
`| diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index fe40d72..3f714cc 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -17,6 +17,8 @@ pub struct Cfg { /// Do not omit closing tags when possible. pub keep_closing_tags: bool, + /// Do not omit `` and `` opening tags when they don't have attributes. + pub keep_html_and_head_opening_tags: bool, /// Keep spaces between attributes when possible to conform to HTML standards. pub keep_spaces_between_attributes: bool, /// Keep all comments. @@ -32,6 +34,7 @@ impl Cfg { Cfg { keep_closing_tags: false, keep_comments: false, + keep_html_and_head_opening_tags: false, keep_spaces_between_attributes: false, minify_css: false, minify_js: false, diff --git a/src/minify/element.rs b/src/minify/element.rs index a8cb491..3f88473 100644 --- a/src/minify/element.rs +++ b/src/minify/element.rs @@ -30,49 +30,55 @@ pub fn minify_element( closing_tag: ElementClosingTag, children: Vec, ) { + let can_omit_opening_tag = (tag_name == b"html" || tag_name == b"head") + && attributes.is_empty() + && !cfg.keep_html_and_head_opening_tags; let can_omit_closing_tag = !cfg.keep_closing_tags && (can_omit_as_before(tag_name, next_sibling_as_element_tag_name) || (is_last_child_text_or_element_node && can_omit_as_last_node(parent, tag_name))); - out.push(b'<'); - out.extend_from_slice(tag_name); - let mut last_attr = LastAttr::NoValue; - // TODO Further optimisation: order attrs based on optimal spacing strategy, given that spaces can be omitted after quoted attrs, and maybe after the tag name? - let mut attrs_sorted = attributes.into_iter().collect::>(); - attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - for (name, value) in attrs_sorted { - let min = minify_attr(ns, tag_name, &name, value); - if let AttrMinified::Redundant = min { - continue; - }; - if cfg.keep_spaces_between_attributes || last_attr != LastAttr::Quoted { - out.push(b' '); - }; - out.extend_from_slice(&name); - match min { - AttrMinified::NoValue => { - last_attr = LastAttr::NoValue; - } - AttrMinified::Value(v) => { - debug_assert!(v.len() > 0); - out.push(b'='); - v.out(out); - last_attr = if v.quoted() { - LastAttr::Quoted - } else { - LastAttr::Unquoted - }; - } - _ => unreachable!(), + // TODO Attributes list could become empty after minification, making opening tag eligible for omission again. + if !can_omit_opening_tag { + out.push(b'<'); + out.extend_from_slice(tag_name); + let mut last_attr = LastAttr::NoValue; + // TODO Further optimisation: order attrs based on optimal spacing strategy, given that spaces can be omitted after quoted attrs, and maybe after the tag name? + let mut attrs_sorted = attributes.into_iter().collect::>(); + attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + for (name, value) in attrs_sorted { + let min = minify_attr(ns, tag_name, &name, value); + if let AttrMinified::Redundant = min { + continue; + }; + if cfg.keep_spaces_between_attributes || last_attr != LastAttr::Quoted { + out.push(b' '); + }; + out.extend_from_slice(&name); + match min { + AttrMinified::NoValue => { + last_attr = LastAttr::NoValue; + } + AttrMinified::Value(v) => { + debug_assert!(v.len() > 0); + out.push(b'='); + v.out(out); + last_attr = if v.quoted() { + LastAttr::Quoted + } else { + LastAttr::Unquoted + }; + } + _ => unreachable!(), + }; + } + if closing_tag == ElementClosingTag::SelfClosing { + if last_attr == LastAttr::Unquoted { + out.push(b' '); + }; + out.push(b'/'); }; + out.push(b'>'); } - if closing_tag == ElementClosingTag::SelfClosing { - if last_attr == LastAttr::Unquoted { - out.push(b' '); - }; - out.push(b'/'); - }; - out.push(b'>'); if closing_tag == ElementClosingTag::SelfClosing || closing_tag == ElementClosingTag::Void { debug_assert!(children.is_empty()); diff --git a/src/parse/bang.rs b/src/parse/bang.rs index 13377d7..8256a2b 100644 --- a/src/parse/bang.rs +++ b/src/parse/bang.rs @@ -3,9 +3,9 @@ use crate::parse::Code; use memchr::memchr; pub fn parse_bang(code: &mut Code) -> NodeData { - debug_assert!(code.str().starts_with(b"', code.str()) { + let (len, matched) = match memchr(b'>', code.as_slice()) { Some(m) => (m, 1), None => (code.rem(), 0), }; diff --git a/src/parse/comment.rs b/src/parse/comment.rs index dc0667b..9c337a7 100644 --- a/src/parse/comment.rs +++ b/src/parse/comment.rs @@ -9,9 +9,9 @@ lazy_static! { } pub fn parse_comment(code: &mut Code) -> NodeData { - debug_assert!(code.str().starts_with(b"