diff --git a/rust/common/src/spec/mod.rs b/rust/common/src/spec/mod.rs index f23423f..b2f3b43 100644 --- a/rust/common/src/spec/mod.rs +++ b/rust/common/src/spec/mod.rs @@ -1,3 +1,2 @@ -pub mod entity; pub mod script; pub mod tag; diff --git a/rust/common/src/spec/entity/decode.rs b/rust/main/src/entity/decode.rs similarity index 97% rename from rust/common/src/spec/entity/decode.rs rename to rust/main/src/entity/decode.rs index 7349945..443d37a 100644 --- a/rust/common/src/spec/entity/decode.rs +++ b/rust/main/src/entity/decode.rs @@ -17,11 +17,11 @@ use std::char::from_u32; use memchr::memchr; -use crate::gen::codepoints::{ +use minify_html_common::gen::codepoints::{ Lookup, ALPHANUMERIC_OR_EQUALS, DIGIT, HEX_DIGIT, LOWER_HEX_ALPHA, UPPER_HEX_ALPHA, }; -use crate::gen::entities::{EntityType, ENTITY}; -use crate::pattern::TrieNodeMatch; +use minify_html_common::gen::entities::{EntityType, ENTITY}; +use minify_html_common::pattern::TrieNodeMatch; enum Decoded { Ignored, diff --git a/rust/common/src/spec/entity/encode.rs b/rust/main/src/entity/encode.rs similarity index 94% rename from rust/common/src/spec/entity/encode.rs rename to rust/main/src/entity/encode.rs index 4642f9f..72a4d64 100644 --- a/rust/common/src/spec/entity/encode.rs +++ b/rust/main/src/entity/encode.rs @@ -2,11 +2,11 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; use lazy_static::lazy_static; use memchr::memchr; -use crate::gen::codepoints::ALPHANUMERIC_OR_EQUALS; -use crate::gen::entities::{ +use minify_html_common::gen::codepoints::ALPHANUMERIC_OR_EQUALS; +use minify_html_common::gen::entities::{ EntityType, ENTITY, SHORTER_ENCODED_ENTITIES_DECODED, SHORTER_ENCODED_ENTITIES_ENCODED, }; -use crate::pattern::TrieNodeMatch; +use minify_html_common::pattern::TrieNodeMatch; lazy_static! { static ref SHORTER_ENCODED_ENTITIES_ENCODED_SEARCHER: AhoCorasick = AhoCorasickBuilder::new() diff --git a/rust/common/src/spec/entity/mod.rs b/rust/main/src/entity/mod.rs similarity index 100% rename from rust/common/src/spec/entity/mod.rs rename to rust/main/src/entity/mod.rs diff --git a/rust/common/src/spec/entity/tests/encode.rs b/rust/main/src/entity/tests/encode.rs similarity index 100% rename from rust/common/src/spec/entity/tests/encode.rs rename to rust/main/src/entity/tests/encode.rs diff --git a/rust/common/src/spec/entity/tests/mod.rs b/rust/main/src/entity/tests/mod.rs similarity index 100% rename from rust/common/src/spec/entity/tests/mod.rs rename to rust/main/src/entity/tests/mod.rs diff --git a/rust/main/src/lib.rs b/rust/main/src/lib.rs index 5904802..bda5984 100644 --- a/rust/main/src/lib.rs +++ b/rust/main/src/lib.rs @@ -7,6 +7,7 @@ use minify_html_common::spec::tag::EMPTY_SLICE; mod ast; mod cfg; +mod entity; mod minify; mod parse; diff --git a/rust/main/src/minify/attr.rs b/rust/main/src/minify/attr.rs index e28b2b1..70c537a 100644 --- a/rust/main/src/minify/attr.rs +++ b/rust/main/src/minify/attr.rs @@ -6,11 +6,11 @@ use { crate::minify::css::MINIFY_CSS_TRANSFORM_OPTIONS, crate::minify::esbuild::minify_using_esbuild, }; +use crate::entity::encode::encode_entities; use crate::Cfg; use minify_html_common::gen::attrs::ATTRS; use minify_html_common::gen::codepoints::DIGIT; use minify_html_common::pattern::Replacer; -use minify_html_common::spec::entity::encode::encode_entities; use minify_html_common::spec::script::JAVASCRIPT_MIME_TYPES; use minify_html_common::spec::tag::ns::Namespace; use minify_html_common::whitespace::{collapse_whitespace, left_trim, right_trim}; diff --git a/rust/main/src/minify/content.rs b/rust/main/src/minify/content.rs index 8423842..31d6926 100644 --- a/rust/main/src/minify/content.rs +++ b/rust/main/src/minify/content.rs @@ -3,6 +3,7 @@ use lazy_static::lazy_static; use crate::ast::{NodeData, ScriptOrStyleLang}; use crate::cfg::Cfg; +use crate::entity::encode::encode_entities; use crate::minify::bang::minify_bang; use crate::minify::comment::minify_comment; use crate::minify::css::minify_css; @@ -11,7 +12,6 @@ use crate::minify::instruction::minify_instruction; use crate::minify::js::minify_js; use minify_html_common::gen::codepoints::TAG_NAME_CHAR; use minify_html_common::pattern::Replacer; -use minify_html_common::spec::entity::encode::encode_entities; use minify_html_common::spec::tag::whitespace::{ get_whitespace_minification_for_tag, WhitespaceMinification, }; diff --git a/rust/main/src/parse/content.rs b/rust/main/src/parse/content.rs index 54c5bb1..6fe737f 100644 --- a/rust/main/src/parse/content.rs +++ b/rust/main/src/parse/content.rs @@ -3,6 +3,7 @@ use lazy_static::lazy_static; use memchr::memrchr; use crate::ast::NodeData; +use crate::entity::decode::decode_entities; use crate::parse::bang::parse_bang; use crate::parse::comment::parse_comment; use crate::parse::content::ContentType::*; @@ -10,7 +11,6 @@ use crate::parse::element::{parse_element, parse_tag, peek_tag_name}; use crate::parse::instruction::parse_instruction; use crate::parse::Code; use minify_html_common::gen::codepoints::TAG_NAME_CHAR; -use minify_html_common::spec::entity::decode::decode_entities; use minify_html_common::spec::tag::ns::Namespace; use minify_html_common::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node}; use minify_html_common::spec::tag::void::VOID_TAGS; diff --git a/rust/main/src/parse/element.rs b/rust/main/src/parse/element.rs index a3f0e23..b161c2a 100644 --- a/rust/main/src/parse/element.rs +++ b/rust/main/src/parse/element.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang}; +use crate::entity::decode::decode_entities; use crate::parse::content::{parse_content, ParsedContent}; use crate::parse::script::parse_script_content; use crate::parse::style::parse_style_content; @@ -11,7 +12,6 @@ use minify_html_common::gen::codepoints::{ ATTR_QUOTE, DOUBLE_QUOTE, NOT_UNQUOTED_ATTR_VAL_CHAR, SINGLE_QUOTE, TAG_NAME_CHAR, WHITESPACE, WHITESPACE_OR_SLASH, WHITESPACE_OR_SLASH_OR_EQUALS_OR_RIGHT_CHEVRON, }; -use minify_html_common::spec::entity::decode::decode_entities; use minify_html_common::spec::script::JAVASCRIPT_MIME_TYPES; use minify_html_common::spec::tag::ns::Namespace; use minify_html_common::spec::tag::void::VOID_TAGS; diff --git a/rust/main/src/parse/textarea.rs b/rust/main/src/parse/textarea.rs index e447e12..629a5ca 100644 --- a/rust/main/src/parse/textarea.rs +++ b/rust/main/src/parse/textarea.rs @@ -3,9 +3,9 @@ use aho_corasick::AhoCorasickBuilder; use lazy_static::lazy_static; use crate::ast::NodeData; +use crate::entity::decode::decode_entities; use crate::parse::content::ParsedContent; use crate::parse::Code; -use minify_html_common::spec::entity::decode::decode_entities; lazy_static! { static ref END: AhoCorasick = AhoCorasickBuilder::new() diff --git a/rust/main/src/parse/title.rs b/rust/main/src/parse/title.rs index 7ee51f8..2c55014 100644 --- a/rust/main/src/parse/title.rs +++ b/rust/main/src/parse/title.rs @@ -3,9 +3,9 @@ use aho_corasick::AhoCorasickBuilder; use lazy_static::lazy_static; use crate::ast::NodeData; +use crate::entity::decode::decode_entities; use crate::parse::content::ParsedContent; use crate::parse::Code; -use minify_html_common::spec::entity::decode::decode_entities; lazy_static! { static ref END: AhoCorasick = AhoCorasickBuilder::new() diff --git a/rust/onepass/src/proc/entity.rs b/rust/onepass/src/proc/entity.rs index d0a790d..274c4f3 100644 --- a/rust/onepass/src/proc/entity.rs +++ b/rust/onepass/src/proc/entity.rs @@ -114,6 +114,7 @@ fn parse_entity(code: &mut [u8], read_pos: usize, write_pos: usize, in_attr_val: ), EntityType::Named(decoded) => { // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state. + // TODO Generated trie no longer contains encoded values, even if longer. if decoded[0] == b'&' && decoded.len() > 1 || in_attr_val && *code.get(read_pos + match_len - 1).unwrap() != b';'