Use FxHasher for internal hash-based data structures

This commit is contained in:
Wilson Lin 2023-01-05 11:48:03 +11:00
parent b6a0a9102e
commit 5989c06e58
13 changed files with 65 additions and 59 deletions

View File

@ -1,5 +1,9 @@
# minify-html changelog
## Pending
- Use FxHasher for internal hash-based data structures.
## 0.10.3
- [Python] Add Python 3.11 support.

View File

@ -21,19 +21,19 @@ const rsTagAttr = ({
`
AttributeMinification {
boolean: ${boolean},
case_insensitive: ${caseInsensitive},
collapse: ${collapse},
case_insensitive: ${caseInsensitive},
collapse: ${collapse},
default_value: ${
defaultValue == undefined ? "None" : `Some(b"${defaultValue}")`
},
redundant_if_empty: ${redundantIfEmpty},
trim: ${trim},
redundant_if_empty: ${redundantIfEmpty},
trim: ${trim},
}
`;
let code = `
use lazy_static::lazy_static;
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use crate::common::spec::tag::ns::Namespace;
pub struct AttributeMinification {
@ -47,7 +47,7 @@ pub struct AttributeMinification {
pub enum AttrMapEntry {
AllNamespaceElements(AttributeMinification),
SpecificNamespaceElements(HashMap<&'static [u8], AttributeMinification>),
SpecificNamespaceElements(FxHashMap<&'static [u8], AttributeMinification>),
}
pub struct ByNamespace {
@ -65,10 +65,10 @@ impl ByNamespace {
}
}
pub struct AttrMap(HashMap<&'static [u8], ByNamespace>);
pub struct AttrMap(FxHashMap<&'static [u8], ByNamespace>);
impl AttrMap {
pub const fn new(map: HashMap<&'static [u8], ByNamespace>) -> AttrMap {
pub const fn new(map: FxHashMap<&'static [u8], ByNamespace>) -> AttrMap {
AttrMap(map)
}
@ -85,7 +85,7 @@ impl AttrMap {
code += `
lazy_static! {
pub static ref ATTRS: AttrMap = {
let mut m = HashMap::<&'static [u8], ByNamespace>::new();
let mut m = FxHashMap::<&'static [u8], ByNamespace>::default();
${[...Object.entries(htmlData.attributes)]
.map(
([attr_name, namespaces]) => ` m.insert(b\"${attr_name}\", ByNamespace {
@ -108,7 +108,7 @@ ${(["html", "svg"] as const)
return `Some({
let ${
entries.length ? "mut" : ""
} m = HashMap::<&'static [u8], AttributeMinification>::new();
} m = FxHashMap::<&'static [u8], AttributeMinification>::default();
${entries
.map(
([tagName, tagAttr]) =>

View File

@ -1,9 +1,9 @@
use lazy_static::lazy_static;
use std::collections::HashSet;
use rustc_hash::FxHashSet;
lazy_static! {
pub static ref JAVASCRIPT_MIME_TYPES: HashSet<&'static [u8]> = {
let mut s = HashSet::<&'static [u8]>::new();
pub static ref JAVASCRIPT_MIME_TYPES: FxHashSet<&'static [u8]> = {
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"application/ecmascript");
s.insert(b"application/javascript");
s.insert(b"application/x-ecmascript");

View File

@ -1,5 +1,5 @@
use lazy_static::lazy_static;
use std::collections::{HashMap, HashSet};
use rustc_hash::{FxHashMap, FxHashSet};
// Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission.
// TODO Opening tags
@ -10,7 +10,7 @@ enum ClosingTagOmissionRuleIfLast {
// Closing tag can never be omitted if it's the last node of its parent's children.
Never,
// Closing tag can be omitted if it's the last node of its parent's children and the parent tag name is not one of these.
ParentIsNot(HashSet<&'static [u8]>),
ParentIsNot(FxHashSet<&'static [u8]>),
}
// What this means in effect while parsing:
@ -21,14 +21,14 @@ enum ClosingTagOmissionRuleIfLast {
// - If C is in followed_by, B is closed implicitly.
struct ClosingTagOmissionRule {
// Closing tag can be omitted if immediately followed by an element node with one of these tag names.
followed_by: HashSet<&'static [u8]>,
followed_by: FxHashSet<&'static [u8]>,
// Closing tag can be omitted if it's the last node of its parent's children.
is_last: ClosingTagOmissionRuleIfLast,
}
lazy_static! {
static ref HTML_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: HashSet::new(),
followed_by: FxHashSet::default(),
is_last: ClosingTagOmissionRuleIfLast::Always,
};
}
@ -36,7 +36,7 @@ lazy_static! {
lazy_static! {
static ref HEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"body");
s
},
@ -46,7 +46,7 @@ lazy_static! {
lazy_static! {
static ref BODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: HashSet::new(),
followed_by: FxHashSet::default(),
is_last: ClosingTagOmissionRuleIfLast::Always,
};
}
@ -54,7 +54,7 @@ lazy_static! {
lazy_static! {
static ref LI_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"li");
s
},
@ -65,7 +65,7 @@ lazy_static! {
lazy_static! {
static ref DT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"dt");
s.insert(b"dd");
s
@ -77,7 +77,7 @@ lazy_static! {
lazy_static! {
static ref DD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"dd");
s.insert(b"dt");
s
@ -88,7 +88,7 @@ lazy_static! {
lazy_static! {
static ref P_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = {
let mut followed_by = HashSet::<&'static [u8]>::new();
let mut followed_by = FxHashSet::<&'static [u8]>::default();
followed_by.insert(b"address");
followed_by.insert(b"article");
followed_by.insert(b"aside");
@ -120,7 +120,7 @@ lazy_static! {
followed_by.insert(b"table");
followed_by.insert(b"ul");
let mut is_last_tags = HashSet::<&'static [u8]>::new();
let mut is_last_tags = FxHashSet::<&'static [u8]>::default();
is_last_tags.insert(b"a");
is_last_tags.insert(b"audio");
is_last_tags.insert(b"del");
@ -139,7 +139,7 @@ lazy_static! {
lazy_static! {
static ref RT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"rt");
s.insert(b"rp");
s
@ -151,7 +151,7 @@ lazy_static! {
lazy_static! {
static ref RP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"rt");
s.insert(b"rp");
s
@ -164,7 +164,7 @@ lazy_static! {
static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule =
ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"optgroup");
s
},
@ -175,7 +175,7 @@ lazy_static! {
lazy_static! {
static ref OPTION_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"option");
s.insert(b"optgroup");
s
@ -187,7 +187,7 @@ lazy_static! {
lazy_static! {
static ref THEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"tbody");
s.insert(b"tfoot");
s
@ -199,7 +199,7 @@ lazy_static! {
lazy_static! {
static ref TBODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"tbody");
s.insert(b"tfoot");
s
@ -210,7 +210,7 @@ lazy_static! {
lazy_static! {
static ref TFOOT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: HashSet::<&'static [u8]>::new(),
followed_by: FxHashSet::<&'static [u8]>::default(),
is_last: ClosingTagOmissionRuleIfLast::Always,
};
}
@ -218,7 +218,7 @@ lazy_static! {
lazy_static! {
static ref TR_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"tr");
s
},
@ -229,7 +229,7 @@ lazy_static! {
lazy_static! {
static ref TD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"td");
s.insert(b"th");
s
@ -241,7 +241,7 @@ lazy_static! {
lazy_static! {
static ref TH_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"td");
s.insert(b"th");
s
@ -251,8 +251,8 @@ lazy_static! {
}
lazy_static! {
static ref CLOSING_TAG_OMISSION_RULES: HashMap<&'static [u8], &'static ClosingTagOmissionRule> = {
let mut m = HashMap::<&'static [u8], &'static ClosingTagOmissionRule>::new();
static ref CLOSING_TAG_OMISSION_RULES: FxHashMap<&'static [u8], &'static ClosingTagOmissionRule> = {
let mut m = FxHashMap::<&'static [u8], &'static ClosingTagOmissionRule>::default();
m.insert(b"html", &HTML_CLOSING_TAG_OMISSION_RULE);
m.insert(b"head", &HEAD_CLOSING_TAG_OMISSION_RULE);
m.insert(b"body", &BODY_CLOSING_TAG_OMISSION_RULE);

View File

@ -1,9 +1,9 @@
use lazy_static::lazy_static;
use std::collections::HashSet;
use rustc_hash::FxHashSet;
lazy_static! {
pub static ref VOID_TAGS: HashSet<&'static [u8]> = {
let mut s = HashSet::<&'static [u8]>::new();
pub static ref VOID_TAGS: FxHashSet<&'static [u8]> = {
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"area");
s.insert(b"base");
s.insert(b"br");

View File

@ -1,5 +1,5 @@
use crate::common::spec::tag::ns::Namespace;
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use lazy_static::lazy_static;
@ -60,8 +60,8 @@ static DEFAULT_SVG: &WhitespaceMinification = &WhitespaceMinification {
};
lazy_static! {
static ref HTML_TAG_WHITESPACE_MINIFICATION: HashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = HashMap::<&'static [u8], &'static WhitespaceMinification>::new();
static ref HTML_TAG_WHITESPACE_MINIFICATION: FxHashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = FxHashMap::<&'static [u8], &'static WhitespaceMinification>::default();
// Content tags.
m.insert(b"address", CONTENT);
m.insert(b"audio", CONTENT);
@ -174,8 +174,8 @@ lazy_static! {
m
};
static ref SVG_TAG_WHITESPACE_MINIFICATION: HashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = HashMap::<&'static [u8], &'static WhitespaceMinification>::new();
static ref SVG_TAG_WHITESPACE_MINIFICATION: FxHashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = FxHashMap::<&'static [u8], &'static WhitespaceMinification>::default();
// Content tags.
m.insert(b"desc", CONTENT);

View File

@ -18,6 +18,7 @@ maintenance = { status = "actively-developed" }
[dependencies]
aho-corasick = "0.7"
css-minify = "0.2.2"
minify-js = "0.2.6"
lazy_static = "1.4"
memchr = "2"
minify-js = "0.2.6"
rustc-hash = "1.1.0"

View File

@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use std::fmt::{Debug, Formatter};
use std::str::from_utf8;
@ -67,7 +67,7 @@ pub enum NodeData {
ended: bool,
},
Element {
attributes: HashMap<Vec<u8>, AttrVal>,
attributes: FxHashMap<Vec<u8>, AttrVal>,
children: Vec<NodeData>,
// If the source doesn't have a closing tag, then we can't add one, as otherwise output could be longer than source.
closing_tag: ElementClosingTag,

View File

@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use crate::ast::{AttrVal, ElementClosingTag, NodeData};
use crate::cfg::Cfg;
@ -19,7 +19,7 @@ pub fn minify_element(
// If the last node of the parent is an element and it's this one.
is_last_child_text_or_element_node: bool,
tag_name: &[u8],
attributes: HashMap<Vec<u8>, AttrVal>,
attributes: FxHashMap<Vec<u8>, AttrVal>,
closing_tag: ElementClosingTag,
children: Vec<NodeData>,
) {

View File

@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use crate::ast::{AttrVal, ElementClosingTag, NodeData, ScriptOrStyleLang};
use crate::common::gen::codepoints::{
@ -37,7 +37,7 @@ pub fn peek_tag_name(code: &mut Code) -> Vec<u8> {
// Derive Eq for testing.
#[derive(Eq, PartialEq)]
pub struct ParsedTag {
pub attributes: HashMap<Vec<u8>, AttrVal>,
pub attributes: FxHashMap<Vec<u8>, AttrVal>,
pub name: Vec<u8>,
pub self_closing: bool,
}
@ -58,10 +58,10 @@ impl Debug for ParsedTag {
}
// While not valid, attributes in closing tags still need to be parsed (and then discarded) as attributes e.g. `</div x=">">`, which is why this function is used for both opening and closing tags.
// TODO Use generics to create version that doesn't create a HashMap.
// TODO Use generics to create version that doesn't create a FxHashMap.
pub fn parse_tag(code: &mut Code) -> ParsedTag {
let elem_name = parse_tag_name(code);
let mut attributes = HashMap::new();
let mut attributes = FxHashMap::default();
let self_closing;
loop {
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).

View File

@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use crate::ast::{AttrVal, ElementClosingTag, NodeData};
use crate::common::spec::tag::ns::Namespace;
@ -27,7 +27,7 @@ fn test_parse_tag() {
tag,
ParsedTag {
attributes: {
let mut map = HashMap::<Vec<u8>, AttrVal>::new();
let mut map = FxHashMap::<Vec<u8>, AttrVal>::new();
map.insert(b"type".to_vec(), val(b"password"));
map.insert(b"\"a\"".to_vec(), val(b" b "));
map.insert(b":cd".to_vec(), val(b""));
@ -57,7 +57,7 @@ fn test_parse_element() {
elem,
NodeData::Element {
attributes: {
let mut map = HashMap::<Vec<u8>, AttrVal>::new();
let mut map = FxHashMap::<Vec<u8>, AttrVal>::new();
map.insert(b"b".to_vec(), val(br#"\"c\""#));
map
},

View File

@ -21,3 +21,4 @@ css-minify = "0.2.2"
lazy_static = "1.4"
memchr = "2"
minify-js = "0.2.6"
rustc-hash = "1.1.0"

View File

@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use lazy_static::lazy_static;
@ -15,8 +15,8 @@ use crate::proc::Processor;
// See comment in `process_attr_value` for full description of why these intentionally do not have semicolons.
lazy_static! {
static ref ENCODED: HashMap<u8, &'static [u8]> = {
let mut m = HashMap::<u8, &'static [u8]>::new();
static ref ENCODED: FxHashMap<u8, &'static [u8]> = {
let mut m = FxHashMap::<u8, &'static [u8]>::default();
m.insert(b'\'', b"&#39");
m.insert(b'"', b"&#34");
m.insert(b'>', b"&gt");