Relax parsing of unquoted attr values to accept own minified output
This commit is contained in:
parent
4b08332f6b
commit
080d4e0c96
|
@ -36,7 +36,8 @@ const SINGLE_QUOTE = [c('\'')];
|
|||
const ATTR_QUOTE = [...DOUBLE_QUOTE, ...SINGLE_QUOTE];
|
||||
// Valid unquoted attribute value characters.
|
||||
// See https://html.spec.whatwg.org/multipage/syntax.html#unquoted for spec.
|
||||
const NOT_UNQUOTED_ATTR_VAL_CHAR = [...WHITESPACE, c('"'), c('\''), c('='), c('<'), c('>'), c('`')];
|
||||
// Browsers seem to simply consider any characters until whitespace or `>` part of an unquoted attribute value, despite the spec having more restrictions on allowed characters.
|
||||
const NOT_UNQUOTED_ATTR_VAL_CHAR = [...WHITESPACE, c('>')];
|
||||
|
||||
// Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`.
|
||||
// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec.
|
||||
|
|
|
@ -195,6 +195,8 @@ fn test_attr_single_quoted_value_minification() {
|
|||
|
||||
#[test]
|
||||
fn test_attr_unquoted_value_minification() {
|
||||
eval(b"<a b==></a>", b"<a b==></a>");
|
||||
eval(b"<a b=`'\"<<==/`/></a>", b"<a b=`'\"<<==/`/></a>");
|
||||
eval(b"<a b=\"hello\"></a>", b"<a b=hello></a>");
|
||||
eval(b"<a b='hello'></a>", b"<a b=hello></a>");
|
||||
eval(b"<a b=/>></a>", br#"<a b="/>"></a>"#);
|
||||
|
@ -292,13 +294,6 @@ fn test_space_between_attrs_minification() {
|
|||
eval(b"<div a=\"a\"b=\"b\"></div>", b"<div a=a b=b></div>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_attr_value_backtick() {
|
||||
// The backtick is not interpreted as a quote; as such, the "b" attribute is interpreted as having an empty value,
|
||||
// and the "`hello`" attribute is a boolean attribute (also empty value).
|
||||
eval(b"<a b=`hello`></a>", b"<a b `hello`></a>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hexadecimal_entity_decoding() {
|
||||
eval(b".", b".");
|
||||
|
|
|
@ -91,7 +91,7 @@ struct Metrics {
|
|||
|
||||
impl Metrics {
|
||||
fn unquoted_len(&self, raw_val: &[u8]) -> usize {
|
||||
// TODO VERIFY (including control characters and Unicode noncharacters) Browsers seem to simply consider any characters until whitespace part of an unquoted attribute value, despite the spec (and minify-html) having more restrictions on allowed characters.
|
||||
// TODO VERIFY (including control characters and Unicode noncharacters) Browsers seem to simply consider any characters until whitespace part of an unquoted attribute value, despite the spec having more restrictions on allowed characters.
|
||||
// Costs for encoding first and last characters if going with unquoted attribute value.
|
||||
// NOTE: Don't need to consider whitespace for either as all whitespace will be encoded and counts as part of `total_whitespace_encoded_length`.
|
||||
// Need to consider semicolon in any encoded entity in case first char is followed by semicolon or digit.
|
||||
|
|
Loading…
Reference in New Issue