Improve attr min perf

This commit is contained in:
Wilson Lin 2021-08-07 15:10:31 +10:00
parent 2885724931
commit a0601da597
2 changed files with 53 additions and 84 deletions

View File

@ -109,43 +109,19 @@ lazy_static! {
static ref UNQUOTED_QUOTED_REPLACER: Replacer = build_unquoted_replacer(); static ref UNQUOTED_QUOTED_REPLACER: Replacer = build_unquoted_replacer();
} }
#[derive(Copy, Clone, Eq, PartialEq)] pub struct AttrMinifiedValue {
pub enum AttrType { quoted: bool,
Redundant,
NoValue,
Quoted,
Unquoted,
}
pub struct AttrValMinified {
typ: AttrType,
prefix: &'static [u8], prefix: &'static [u8],
data: Vec<u8>, data: Vec<u8>,
start: usize, start: usize,
suffix: &'static [u8], suffix: &'static [u8],
} }
impl Eq for AttrValMinified {} impl AttrMinifiedValue {
pub fn quoted(&self) -> bool {
impl PartialEq<Self> for AttrValMinified { self.quoted
fn eq(&self, other: &Self) -> bool {
self.len() == other.len()
} }
}
impl PartialOrd<Self> for AttrValMinified {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.len().partial_cmp(&other.len())
}
}
impl Ord for AttrValMinified {
fn cmp(&self, other: &Self) -> Ordering {
self.len().cmp(&other.len())
}
}
impl AttrValMinified {
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.prefix.len() + (self.data.len() - self.start) + self.suffix.len() self.prefix.len() + (self.data.len() - self.start) + self.suffix.len()
} }
@ -162,15 +138,11 @@ impl AttrValMinified {
self.out(&mut out); self.out(&mut out);
String::from_utf8(out).unwrap() String::from_utf8(out).unwrap()
} }
pub fn typ(&self) -> AttrType {
self.typ
}
} }
pub fn encode_using_double_quotes(val: &[u8]) -> AttrValMinified { pub fn encode_using_double_quotes(val: &[u8]) -> AttrMinifiedValue {
AttrValMinified { AttrMinifiedValue {
typ: AttrType::Quoted, quoted: true,
prefix: b"\"", prefix: b"\"",
data: DOUBLE_QUOTED_REPLACER.replace_all(val), data: DOUBLE_QUOTED_REPLACER.replace_all(val),
start: 0, start: 0,
@ -178,9 +150,9 @@ pub fn encode_using_double_quotes(val: &[u8]) -> AttrValMinified {
} }
} }
pub fn encode_using_single_quotes(val: &[u8]) -> AttrValMinified { pub fn encode_using_single_quotes(val: &[u8]) -> AttrMinifiedValue {
AttrValMinified { AttrMinifiedValue {
typ: AttrType::Quoted, quoted: true,
prefix: b"'", prefix: b"'",
data: SINGLE_QUOTED_REPLACER.replace_all(val), data: SINGLE_QUOTED_REPLACER.replace_all(val),
start: 0, start: 0,
@ -188,22 +160,22 @@ pub fn encode_using_single_quotes(val: &[u8]) -> AttrValMinified {
} }
} }
pub fn encode_unquoted(val: &[u8]) -> AttrValMinified { pub fn encode_unquoted(val: &[u8]) -> AttrMinifiedValue {
let data = UNQUOTED_QUOTED_REPLACER.replace_all(val); let data = UNQUOTED_QUOTED_REPLACER.replace_all(val);
let prefix: &'static [u8] = match data.get(0) { let prefix: &'static [u8] = match data.get(0) {
Some(b'"') => match data.get(1) { Some(b'"') => match data.get(1) {
Some(&s) if DIGIT[s] || s == b';' => b"&#34;", Some(&c2) if DIGIT[c2] || c2 == b';' => b"&#34;",
_ => b"&#34", _ => b"&#34",
}, },
Some(b'\'') => match data.get(1) { Some(b'\'') => match data.get(1) {
Some(&s) if DIGIT[s] || s == b';' => b"&#39;", Some(&c2) if DIGIT[c2] || c2 == b';' => b"&#39;",
_ => b"&#39", _ => b"&#39",
}, },
_ => b"", _ => b"",
}; };
let start = if !prefix.is_empty() { 1 } else { 0 }; let start = if !prefix.is_empty() { 1 } else { 0 };
AttrValMinified { AttrMinifiedValue {
typ: AttrType::Unquoted, quoted: false,
prefix, prefix,
data, data,
start, start,
@ -211,12 +183,13 @@ pub fn encode_unquoted(val: &[u8]) -> AttrValMinified {
} }
} }
pub fn minify_attr_val( pub enum AttrMinified {
ns: Namespace, Redundant,
tag: &[u8], NoValue,
name: &[u8], Value(AttrMinifiedValue),
mut value_raw: Vec<u8>, }
) -> AttrValMinified {
pub fn minify_attr(ns: Namespace, tag: &[u8], name: &[u8], mut value_raw: Vec<u8>) -> AttrMinified {
let attr_cfg = ATTRS.get(ns, tag, name); let attr_cfg = ATTRS.get(ns, tag, name);
let should_collapse_and_trim = attr_cfg.filter(|attr| attr.collapse_and_trim).is_some(); let should_collapse_and_trim = attr_cfg.filter(|attr| attr.collapse_and_trim).is_some();
@ -237,33 +210,24 @@ pub fn minify_attr_val(
// TODO Cfg. // TODO Cfg.
|| (tag == b"script" && JAVASCRIPT_MIME_TYPES.contains(value_raw.as_slice())) || (tag == b"script" && JAVASCRIPT_MIME_TYPES.contains(value_raw.as_slice()))
{ {
return AttrValMinified { return AttrMinified::Redundant;
typ: AttrType::Redundant,
prefix: b"",
data: Vec::new(),
start: 0,
suffix: b"",
};
}; };
if is_boolean { if is_boolean || value_raw.is_empty() {
return AttrValMinified { return AttrMinified::NoValue;
typ: AttrType::NoValue,
prefix: b"",
data: Vec::new(),
start: 0,
suffix: b"",
};
}; };
let encoded = encode_entities(&value_raw, true); let encoded = encode_entities(&value_raw, true);
// When lengths are equal, prefer double quotes to all and single quotes to unquoted. // When lengths are equal, prefer double quotes to all and single quotes to unquoted.
min( let mut min = encode_using_double_quotes(&encoded);
min( let sq = encode_using_single_quotes(&encoded);
encode_using_double_quotes(&encoded), if sq.len() < min.len() {
encode_using_single_quotes(&encoded), min = sq;
), };
encode_unquoted(&encoded), let uq = encode_unquoted(&encoded);
) if uq.len() < min.len() {
min = uq;
};
AttrMinified::Value(min)
} }

View File

@ -2,7 +2,7 @@ use std::collections::HashMap;
use crate::ast::{ElementClosingTag, NodeData}; use crate::ast::{ElementClosingTag, NodeData};
use crate::cfg::Cfg; use crate::cfg::Cfg;
use crate::minify::attr::{minify_attr_val, AttrType}; use crate::minify::attr::{minify_attr, AttrMinified};
use crate::minify::content::minify_content; use crate::minify::content::minify_content;
use crate::spec::tag::ns::Namespace; use crate::spec::tag::ns::Namespace;
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node}; use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
@ -29,28 +29,33 @@ pub fn minify_element(
out.push(b'<'); out.push(b'<');
out.extend_from_slice(tag_name); out.extend_from_slice(tag_name);
let mut last_attr = AttrType::NoValue; let mut last_attr_was_quoted = false;
let mut attrs_sorted = attributes.into_iter().collect::<Vec<_>>(); let mut attrs_sorted = attributes.into_iter().collect::<Vec<_>>();
attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0)); attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0));
for (name, value) in attrs_sorted { for (name, value) in attrs_sorted {
let min = minify_attr_val(ns, tag_name, &name, value); let min = minify_attr(ns, tag_name, &name, value);
if min.typ() == AttrType::Redundant { if let AttrMinified::Redundant = min {
continue; continue;
}; };
if cfg.keep_spaces_between_attributes || last_attr != AttrType::Quoted { if cfg.keep_spaces_between_attributes || !last_attr_was_quoted {
out.push(b' '); out.push(b' ');
}; };
out.extend_from_slice(&name); out.extend_from_slice(&name);
if min.len() == 0 { match min {
last_attr = AttrType::NoValue; AttrMinified::NoValue => {
} else { last_attr_was_quoted = false;
out.push(b'='); }
min.out(out); AttrMinified::Value(v) => {
last_attr = min.typ(); debug_assert!(v.len() > 0);
out.push(b'=');
v.out(out);
last_attr_was_quoted = v.quoted();
}
_ => unreachable!(),
}; };
} }
if closing_tag == ElementClosingTag::SelfClosing { if closing_tag == ElementClosingTag::SelfClosing {
if last_attr == AttrType::Unquoted { if !last_attr_was_quoted {
out.push(b' '); out.push(b' ');
}; };
out.push(b'/'); out.push(b'/');