From 5989c06e586272e71a88c0d09482642a8919d2ad Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Thu, 5 Jan 2023 11:48:03 +1100 Subject: [PATCH] Use FxHasher for internal hash-based data structures --- CHANGELOG.md | 4 +++ gen/attrs.ts | 20 ++++++------ rust/common/spec/script.rs | 6 ++-- rust/common/spec/tag/omission.rs | 46 ++++++++++++++-------------- rust/common/spec/tag/void.rs | 6 ++-- rust/common/spec/tag/whitespace.rs | 10 +++--- rust/main/Cargo.toml | 3 +- rust/main/src/ast/mod.rs | 4 +-- rust/main/src/minify/element.rs | 4 +-- rust/main/src/parse/element.rs | 8 ++--- rust/main/src/parse/tests/element.rs | 6 ++-- rust/onepass/Cargo.toml | 1 + rust/onepass/src/unit/attr/value.rs | 6 ++-- 13 files changed, 65 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e78f4b..e33b6b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # minify-html changelog +## Pending + +- Use FxHasher for internal hash-based data structures. + ## 0.10.3 - [Python] Add Python 3.11 support. diff --git a/gen/attrs.ts b/gen/attrs.ts index dc3906d..7a7f180 100644 --- a/gen/attrs.ts +++ b/gen/attrs.ts @@ -21,19 +21,19 @@ const rsTagAttr = ({ ` AttributeMinification { boolean: ${boolean}, - case_insensitive: ${caseInsensitive}, - collapse: ${collapse}, + case_insensitive: ${caseInsensitive}, + collapse: ${collapse}, default_value: ${ defaultValue == undefined ? "None" : `Some(b"${defaultValue}")` }, - redundant_if_empty: ${redundantIfEmpty}, - trim: ${trim}, + redundant_if_empty: ${redundantIfEmpty}, + trim: ${trim}, } `; let code = ` use lazy_static::lazy_static; -use std::collections::HashMap; +use rustc_hash::FxHashMap; use crate::common::spec::tag::ns::Namespace; pub struct AttributeMinification { @@ -47,7 +47,7 @@ pub struct AttributeMinification { pub enum AttrMapEntry { AllNamespaceElements(AttributeMinification), - SpecificNamespaceElements(HashMap<&'static [u8], AttributeMinification>), + SpecificNamespaceElements(FxHashMap<&'static [u8], AttributeMinification>), } pub struct ByNamespace { @@ -65,10 +65,10 @@ impl ByNamespace { } } -pub struct AttrMap(HashMap<&'static [u8], ByNamespace>); +pub struct AttrMap(FxHashMap<&'static [u8], ByNamespace>); impl AttrMap { - pub const fn new(map: HashMap<&'static [u8], ByNamespace>) -> AttrMap { + pub const fn new(map: FxHashMap<&'static [u8], ByNamespace>) -> AttrMap { AttrMap(map) } @@ -85,7 +85,7 @@ impl AttrMap { code += ` lazy_static! { pub static ref ATTRS: AttrMap = { - let mut m = HashMap::<&'static [u8], ByNamespace>::new(); + let mut m = FxHashMap::<&'static [u8], ByNamespace>::default(); ${[...Object.entries(htmlData.attributes)] .map( ([attr_name, namespaces]) => ` m.insert(b\"${attr_name}\", ByNamespace { @@ -108,7 +108,7 @@ ${(["html", "svg"] as const) return `Some({ let ${ entries.length ? "mut" : "" - } m = HashMap::<&'static [u8], AttributeMinification>::new(); + } m = FxHashMap::<&'static [u8], AttributeMinification>::default(); ${entries .map( ([tagName, tagAttr]) => diff --git a/rust/common/spec/script.rs b/rust/common/spec/script.rs index c51c574..adcfacf 100644 --- a/rust/common/spec/script.rs +++ b/rust/common/spec/script.rs @@ -1,9 +1,9 @@ use lazy_static::lazy_static; -use std::collections::HashSet; +use rustc_hash::FxHashSet; lazy_static! { - pub static ref JAVASCRIPT_MIME_TYPES: HashSet<&'static [u8]> = { - let mut s = HashSet::<&'static [u8]>::new(); + pub static ref JAVASCRIPT_MIME_TYPES: FxHashSet<&'static [u8]> = { + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"application/ecmascript"); s.insert(b"application/javascript"); s.insert(b"application/x-ecmascript"); diff --git a/rust/common/spec/tag/omission.rs b/rust/common/spec/tag/omission.rs index ec70c93..4ca0029 100644 --- a/rust/common/spec/tag/omission.rs +++ b/rust/common/spec/tag/omission.rs @@ -1,5 +1,5 @@ use lazy_static::lazy_static; -use std::collections::{HashMap, HashSet}; +use rustc_hash::{FxHashMap, FxHashSet}; // Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission. // TODO Opening tags @@ -10,7 +10,7 @@ enum ClosingTagOmissionRuleIfLast { // Closing tag can never be omitted if it's the last node of its parent's children. Never, // Closing tag can be omitted if it's the last node of its parent's children and the parent tag name is not one of these. - ParentIsNot(HashSet<&'static [u8]>), + ParentIsNot(FxHashSet<&'static [u8]>), } // What this means in effect while parsing: @@ -21,14 +21,14 @@ enum ClosingTagOmissionRuleIfLast { // - If C is in followed_by, B is closed implicitly. struct ClosingTagOmissionRule { // Closing tag can be omitted if immediately followed by an element node with one of these tag names. - followed_by: HashSet<&'static [u8]>, + followed_by: FxHashSet<&'static [u8]>, // Closing tag can be omitted if it's the last node of its parent's children. is_last: ClosingTagOmissionRuleIfLast, } lazy_static! { static ref HTML_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { - followed_by: HashSet::new(), + followed_by: FxHashSet::default(), is_last: ClosingTagOmissionRuleIfLast::Always, }; } @@ -36,7 +36,7 @@ lazy_static! { lazy_static! { static ref HEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"body"); s }, @@ -46,7 +46,7 @@ lazy_static! { lazy_static! { static ref BODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { - followed_by: HashSet::new(), + followed_by: FxHashSet::default(), is_last: ClosingTagOmissionRuleIfLast::Always, }; } @@ -54,7 +54,7 @@ lazy_static! { lazy_static! { static ref LI_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"li"); s }, @@ -65,7 +65,7 @@ lazy_static! { lazy_static! { static ref DT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"dt"); s.insert(b"dd"); s @@ -77,7 +77,7 @@ lazy_static! { lazy_static! { static ref DD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"dd"); s.insert(b"dt"); s @@ -88,7 +88,7 @@ lazy_static! { lazy_static! { static ref P_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = { - let mut followed_by = HashSet::<&'static [u8]>::new(); + let mut followed_by = FxHashSet::<&'static [u8]>::default(); followed_by.insert(b"address"); followed_by.insert(b"article"); followed_by.insert(b"aside"); @@ -120,7 +120,7 @@ lazy_static! { followed_by.insert(b"table"); followed_by.insert(b"ul"); - let mut is_last_tags = HashSet::<&'static [u8]>::new(); + let mut is_last_tags = FxHashSet::<&'static [u8]>::default(); is_last_tags.insert(b"a"); is_last_tags.insert(b"audio"); is_last_tags.insert(b"del"); @@ -139,7 +139,7 @@ lazy_static! { lazy_static! { static ref RT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"rt"); s.insert(b"rp"); s @@ -151,7 +151,7 @@ lazy_static! { lazy_static! { static ref RP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"rt"); s.insert(b"rp"); s @@ -164,7 +164,7 @@ lazy_static! { static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"optgroup"); s }, @@ -175,7 +175,7 @@ lazy_static! { lazy_static! { static ref OPTION_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"option"); s.insert(b"optgroup"); s @@ -187,7 +187,7 @@ lazy_static! { lazy_static! { static ref THEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"tbody"); s.insert(b"tfoot"); s @@ -199,7 +199,7 @@ lazy_static! { lazy_static! { static ref TBODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"tbody"); s.insert(b"tfoot"); s @@ -210,7 +210,7 @@ lazy_static! { lazy_static! { static ref TFOOT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { - followed_by: HashSet::<&'static [u8]>::new(), + followed_by: FxHashSet::<&'static [u8]>::default(), is_last: ClosingTagOmissionRuleIfLast::Always, }; } @@ -218,7 +218,7 @@ lazy_static! { lazy_static! { static ref TR_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"tr"); s }, @@ -229,7 +229,7 @@ lazy_static! { lazy_static! { static ref TD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"td"); s.insert(b"th"); s @@ -241,7 +241,7 @@ lazy_static! { lazy_static! { static ref TH_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule { followed_by: { - let mut s = HashSet::<&'static [u8]>::new(); + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"td"); s.insert(b"th"); s @@ -251,8 +251,8 @@ lazy_static! { } lazy_static! { - static ref CLOSING_TAG_OMISSION_RULES: HashMap<&'static [u8], &'static ClosingTagOmissionRule> = { - let mut m = HashMap::<&'static [u8], &'static ClosingTagOmissionRule>::new(); + static ref CLOSING_TAG_OMISSION_RULES: FxHashMap<&'static [u8], &'static ClosingTagOmissionRule> = { + let mut m = FxHashMap::<&'static [u8], &'static ClosingTagOmissionRule>::default(); m.insert(b"html", &HTML_CLOSING_TAG_OMISSION_RULE); m.insert(b"head", &HEAD_CLOSING_TAG_OMISSION_RULE); m.insert(b"body", &BODY_CLOSING_TAG_OMISSION_RULE); diff --git a/rust/common/spec/tag/void.rs b/rust/common/spec/tag/void.rs index b39d790..4c79899 100644 --- a/rust/common/spec/tag/void.rs +++ b/rust/common/spec/tag/void.rs @@ -1,9 +1,9 @@ use lazy_static::lazy_static; -use std::collections::HashSet; +use rustc_hash::FxHashSet; lazy_static! { - pub static ref VOID_TAGS: HashSet<&'static [u8]> = { - let mut s = HashSet::<&'static [u8]>::new(); + pub static ref VOID_TAGS: FxHashSet<&'static [u8]> = { + let mut s = FxHashSet::<&'static [u8]>::default(); s.insert(b"area"); s.insert(b"base"); s.insert(b"br"); diff --git a/rust/common/spec/tag/whitespace.rs b/rust/common/spec/tag/whitespace.rs index 7741c22..917caab 100644 --- a/rust/common/spec/tag/whitespace.rs +++ b/rust/common/spec/tag/whitespace.rs @@ -1,5 +1,5 @@ use crate::common::spec::tag::ns::Namespace; -use std::collections::HashMap; +use rustc_hash::FxHashMap; use lazy_static::lazy_static; @@ -60,8 +60,8 @@ static DEFAULT_SVG: &WhitespaceMinification = &WhitespaceMinification { }; lazy_static! { - static ref HTML_TAG_WHITESPACE_MINIFICATION: HashMap<&'static [u8], &'static WhitespaceMinification> = { - let mut m = HashMap::<&'static [u8], &'static WhitespaceMinification>::new(); + static ref HTML_TAG_WHITESPACE_MINIFICATION: FxHashMap<&'static [u8], &'static WhitespaceMinification> = { + let mut m = FxHashMap::<&'static [u8], &'static WhitespaceMinification>::default(); // Content tags. m.insert(b"address", CONTENT); m.insert(b"audio", CONTENT); @@ -174,8 +174,8 @@ lazy_static! { m }; - static ref SVG_TAG_WHITESPACE_MINIFICATION: HashMap<&'static [u8], &'static WhitespaceMinification> = { - let mut m = HashMap::<&'static [u8], &'static WhitespaceMinification>::new(); + static ref SVG_TAG_WHITESPACE_MINIFICATION: FxHashMap<&'static [u8], &'static WhitespaceMinification> = { + let mut m = FxHashMap::<&'static [u8], &'static WhitespaceMinification>::default(); // Content tags. m.insert(b"desc", CONTENT); diff --git a/rust/main/Cargo.toml b/rust/main/Cargo.toml index 2d9b246..2317f61 100644 --- a/rust/main/Cargo.toml +++ b/rust/main/Cargo.toml @@ -18,6 +18,7 @@ maintenance = { status = "actively-developed" } [dependencies] aho-corasick = "0.7" css-minify = "0.2.2" -minify-js = "0.2.6" lazy_static = "1.4" memchr = "2" +minify-js = "0.2.6" +rustc-hash = "1.1.0" diff --git a/rust/main/src/ast/mod.rs b/rust/main/src/ast/mod.rs index 225a62b..d1c0203 100644 --- a/rust/main/src/ast/mod.rs +++ b/rust/main/src/ast/mod.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap; use std::fmt::{Debug, Formatter}; use std::str::from_utf8; @@ -67,7 +67,7 @@ pub enum NodeData { ended: bool, }, Element { - attributes: HashMap, AttrVal>, + attributes: FxHashMap, AttrVal>, children: Vec, // If the source doesn't have a closing tag, then we can't add one, as otherwise output could be longer than source. closing_tag: ElementClosingTag, diff --git a/rust/main/src/minify/element.rs b/rust/main/src/minify/element.rs index d2f8e61..c9483e1 100644 --- a/rust/main/src/minify/element.rs +++ b/rust/main/src/minify/element.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap; use crate::ast::{AttrVal, ElementClosingTag, NodeData}; use crate::cfg::Cfg; @@ -19,7 +19,7 @@ pub fn minify_element( // If the last node of the parent is an element and it's this one. is_last_child_text_or_element_node: bool, tag_name: &[u8], - attributes: HashMap, AttrVal>, + attributes: FxHashMap, AttrVal>, closing_tag: ElementClosingTag, children: Vec, ) { diff --git a/rust/main/src/parse/element.rs b/rust/main/src/parse/element.rs index 738d5b5..0a81dad 100644 --- a/rust/main/src/parse/element.rs +++ b/rust/main/src/parse/element.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap; use crate::ast::{AttrVal, ElementClosingTag, NodeData, ScriptOrStyleLang}; use crate::common::gen::codepoints::{ @@ -37,7 +37,7 @@ pub fn peek_tag_name(code: &mut Code) -> Vec { // Derive Eq for testing. #[derive(Eq, PartialEq)] pub struct ParsedTag { - pub attributes: HashMap, AttrVal>, + pub attributes: FxHashMap, AttrVal>, pub name: Vec, pub self_closing: bool, } @@ -58,10 +58,10 @@ impl Debug for ParsedTag { } // While not valid, attributes in closing tags still need to be parsed (and then discarded) as attributes e.g. ``, which is why this function is used for both opening and closing tags. -// TODO Use generics to create version that doesn't create a HashMap. +// TODO Use generics to create version that doesn't create a FxHashMap. pub fn parse_tag(code: &mut Code) -> ParsedTag { let elem_name = parse_tag_name(code); - let mut attributes = HashMap::new(); + let mut attributes = FxHashMap::default(); let self_closing; loop { // At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one). diff --git a/rust/main/src/parse/tests/element.rs b/rust/main/src/parse/tests/element.rs index 4c9ee11..77c343b 100644 --- a/rust/main/src/parse/tests/element.rs +++ b/rust/main/src/parse/tests/element.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap; use crate::ast::{AttrVal, ElementClosingTag, NodeData}; use crate::common::spec::tag::ns::Namespace; @@ -27,7 +27,7 @@ fn test_parse_tag() { tag, ParsedTag { attributes: { - let mut map = HashMap::, AttrVal>::new(); + let mut map = FxHashMap::, AttrVal>::new(); map.insert(b"type".to_vec(), val(b"password")); map.insert(b"\"a\"".to_vec(), val(b" b ")); map.insert(b":cd".to_vec(), val(b"")); @@ -57,7 +57,7 @@ fn test_parse_element() { elem, NodeData::Element { attributes: { - let mut map = HashMap::, AttrVal>::new(); + let mut map = FxHashMap::, AttrVal>::new(); map.insert(b"b".to_vec(), val(br#"\"c\""#)); map }, diff --git a/rust/onepass/Cargo.toml b/rust/onepass/Cargo.toml index f11aeec..d2c3061 100644 --- a/rust/onepass/Cargo.toml +++ b/rust/onepass/Cargo.toml @@ -21,3 +21,4 @@ css-minify = "0.2.2" lazy_static = "1.4" memchr = "2" minify-js = "0.2.6" +rustc-hash = "1.1.0" diff --git a/rust/onepass/src/unit/attr/value.rs b/rust/onepass/src/unit/attr/value.rs index 59dd269..50caaa2 100644 --- a/rust/onepass/src/unit/attr/value.rs +++ b/rust/onepass/src/unit/attr/value.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMap; use lazy_static::lazy_static; @@ -15,8 +15,8 @@ use crate::proc::Processor; // See comment in `process_attr_value` for full description of why these intentionally do not have semicolons. lazy_static! { - static ref ENCODED: HashMap = { - let mut m = HashMap::::new(); + static ref ENCODED: FxHashMap = { + let mut m = FxHashMap::::default(); m.insert(b'\'', b"'"); m.insert(b'"', b"""); m.insert(b'>', b">");