diff --git a/gen/codepoints.ts b/gen/codepoints.ts index 75c5ccc..ec5ad9d 100644 --- a/gen/codepoints.ts +++ b/gen/codepoints.ts @@ -36,7 +36,8 @@ const SINGLE_QUOTE = [c('\'')]; const ATTR_QUOTE = [...DOUBLE_QUOTE, ...SINGLE_QUOTE]; // Valid unquoted attribute value characters. // See https://html.spec.whatwg.org/multipage/syntax.html#unquoted for spec. -const NOT_UNQUOTED_ATTR_VAL_CHAR = [...WHITESPACE, c('"'), c('\''), c('='), c('<'), c('>'), c('`')]; +// Browsers seem to simply consider any characters until whitespace or `>` part of an unquoted attribute value, despite the spec having more restrictions on allowed characters. +const NOT_UNQUOTED_ATTR_VAL_CHAR = [...WHITESPACE, c('>')]; // Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`. // See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec. diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 9ce120e..d3efa90 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -195,6 +195,8 @@ fn test_attr_single_quoted_value_minification() { #[test] fn test_attr_unquoted_value_minification() { + eval(b"", b""); + eval(b"", b""); eval(b"", b""); eval(b"", b""); eval(b"", br#""#); @@ -292,13 +294,6 @@ fn test_space_between_attrs_minification() { eval(b"
", b"
"); } -#[test] -fn test_attr_value_backtick() { - // The backtick is not interpreted as a quote; as such, the "b" attribute is interpreted as having an empty value, - // and the "`hello`" attribute is a boolean attribute (also empty value). - eval(b"", b""); -} - #[test] fn test_hexadecimal_entity_decoding() { eval(b".", b"."); diff --git a/src/unit/attr/value.rs b/src/unit/attr/value.rs index 07135c2..e87b102 100644 --- a/src/unit/attr/value.rs +++ b/src/unit/attr/value.rs @@ -91,7 +91,7 @@ struct Metrics { impl Metrics { fn unquoted_len(&self, raw_val: &[u8]) -> usize { - // TODO VERIFY (including control characters and Unicode noncharacters) Browsers seem to simply consider any characters until whitespace part of an unquoted attribute value, despite the spec (and minify-html) having more restrictions on allowed characters. + // TODO VERIFY (including control characters and Unicode noncharacters) Browsers seem to simply consider any characters until whitespace part of an unquoted attribute value, despite the spec having more restrictions on allowed characters. // Costs for encoding first and last characters if going with unquoted attribute value. // NOTE: Don't need to consider whitespace for either as all whitespace will be encoded and counts as part of `total_whitespace_encoded_length`. // Need to consider semicolon in any encoded entity in case first char is followed by semicolon or digit.