From 4fc9496829b82b7446c208e2d893e2c863291f36 Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Sun, 8 Aug 2021 19:00:51 +1000 Subject: [PATCH] Fix references in onepass --- gen/codepoints.ts | 5 +++++ rust/onepass/src/lib.rs | 10 +++++----- rust/onepass/src/proc/mod.rs | 7 +++++++ rust/onepass/src/unit/attr/mod.rs | 4 ++-- rust/onepass/src/unit/content.rs | 10 +++++----- rust/onepass/src/unit/tag.rs | 4 ++-- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/gen/codepoints.ts b/gen/codepoints.ts index 0c1f1aa..cc1470c 100644 --- a/gen/codepoints.ts +++ b/gen/codepoints.ts @@ -46,6 +46,10 @@ const WHITESPACE_OR_SLASH_OR_EQUALS_OR_RIGHT_CHEVRON = [ const DOUBLE_QUOTE = [c('"')]; const SINGLE_QUOTE = [c("'")]; +// Official characters allowed in an attribute name. +// NOTE: Unicode noncharacters not tested. +// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec. +const WHATWG_ATTR_NAME_CHAR = invert([...CONTROL, c(' '), c('"'), c('\''), c('>'), c('/'), c('=')]); // Valid attribute quote characters. // See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example for spec. // Backtick is not a valid quote character according to spec. @@ -88,6 +92,7 @@ impl std::ops::Index for Lookup { WHITESPACE_OR_SLASH, WHITESPACE_OR_SLASH_OR_EQUALS_OR_RIGHT_CHEVRON, + WHATWG_ATTR_NAME_CHAR, DOUBLE_QUOTE, SINGLE_QUOTE, ATTR_QUOTE, diff --git a/rust/onepass/src/lib.rs b/rust/onepass/src/lib.rs index d23e9e0..3ab0edb 100644 --- a/rust/onepass/src/lib.rs +++ b/rust/onepass/src/lib.rs @@ -22,7 +22,7 @@ mod unit; /// # Examples /// /// ``` -/// use minify_html::{Cfg, Error, in_place}; +/// use minify_html_onepass::{Cfg, Error, in_place}; /// /// let mut code = b"

Hello, world!

".to_vec(); /// let cfg = &Cfg { @@ -62,7 +62,7 @@ pub fn in_place(code: &mut [u8], cfg: &Cfg) -> Result { /// # Examples /// /// ``` -/// use minify_html::{Cfg, Error, in_place_str}; +/// use minify_html_onepass::{Cfg, Error, in_place_str}; /// /// let mut code = "

Hello, world!

".to_string(); /// let cfg = &Cfg { @@ -92,7 +92,7 @@ pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, Error> /// # Examples /// /// ``` -/// use minify_html::{Cfg, Error, truncate}; +/// use minify_html_onepass::{Cfg, Error, truncate}; /// /// let mut code = b"

Hello, world!

".to_vec(); /// let cfg = &Cfg { @@ -125,7 +125,7 @@ pub fn truncate(code: &mut Vec, cfg: &Cfg) -> Result<(), Error> { /// # Examples /// /// ``` -/// use minify_html::{Cfg, Error, copy}; +/// use minify_html_onepass::{Cfg, Error, copy}; /// /// let mut code: &[u8] = b"

Hello, world!

"; /// let cfg = &Cfg { @@ -164,7 +164,7 @@ pub fn copy(code: &[u8], cfg: &Cfg) -> Result, Error> { /// # Examples /// /// ``` -/// use minify_html::{Cfg, FriendlyError, with_friendly_error}; +/// use minify_html_onepass::{Cfg, FriendlyError, with_friendly_error}; /// /// let mut code = b"

".to_vec(); /// let cfg = &Cfg { diff --git a/rust/onepass/src/proc/mod.rs b/rust/onepass/src/proc/mod.rs index 712347e..2baffd0 100644 --- a/rust/onepass/src/proc/mod.rs +++ b/rust/onepass/src/proc/mod.rs @@ -16,6 +16,7 @@ use crate::proc::range::ProcessorRange; use crate::proc::MatchAction::*; use crate::proc::MatchMode::*; use minify_html_common::gen::codepoints::Lookup; +use minify_html_common::spec::tag::EMPTY_SLICE; pub mod checkpoint; pub mod entity; @@ -239,6 +240,12 @@ impl<'d> Processor<'d> { !self._in_bounds(0) } + #[inline(always)] + pub fn get_or_empty(&self, r: Option) -> &[u8] { + r.and_then(|r| self.code.get(r.start..r.end)) + .unwrap_or(EMPTY_SLICE) + } + #[inline(always)] pub fn require_not_at_end(&self) -> ProcessingResult<()> { if self.at_end() { diff --git a/rust/onepass/src/unit/attr/mod.rs b/rust/onepass/src/unit/attr/mod.rs index 8bc78cc..454b244 100644 --- a/rust/onepass/src/unit/attr/mod.rs +++ b/rust/onepass/src/unit/attr/mod.rs @@ -8,7 +8,7 @@ use crate::unit::attr::value::{ process_attr_value, skip_attr_value, DelimiterType, ProcessedAttrValue, }; use minify_html_common::gen::attrs::ATTRS; -use minify_html_common::gen::codepoints::{ATTR_NAME_CHAR, WHITESPACE}; +use minify_html_common::gen::codepoints::{WHATWG_ATTR_NAME_CHAR, WHITESPACE}; use minify_html_common::spec::tag::ns::Namespace; mod value; @@ -34,7 +34,7 @@ pub fn process_attr( // It's possible to expect attribute name but not be called at an attribute, e.g. due to whitespace between name and // value, which causes name to be considered boolean attribute and `=` to be start of new (invalid) attribute name. let name = proc - .m(WhileInLookup(ATTR_NAME_CHAR), Keep) + .m(WhileInLookup(WHATWG_ATTR_NAME_CHAR), Keep) .require("attribute name")?; proc.make_lowercase(name); let attr_cfg = ATTRS.get(ns, &proc[element], &proc[name]); diff --git a/rust/onepass/src/unit/content.rs b/rust/onepass/src/unit/content.rs index 7363899..5d29250 100644 --- a/rust/onepass/src/unit/content.rs +++ b/rust/onepass/src/unit/content.rs @@ -64,7 +64,7 @@ pub fn process_content( collapse, destroy_whole, trim, - } = get_whitespace_minification_for_tag(parent.map(|r| &proc[r]), descendant_of_pre); + } = get_whitespace_minification_for_tag(proc.get_or_empty(parent), descendant_of_pre); let handle_ws = collapse || destroy_whole || trim; @@ -146,7 +146,7 @@ pub fn process_content( .require("tag name")?; proc.make_lowercase(tag_name); - if can_omit_as_before(proc, parent, tag_name) { + if can_omit_as_before(proc.get_or_empty(parent), &proc[tag_name]) { // TODO Is this necessary? Can a previous closing tag even exist? prev_sibling_closing_tag.write_if_exists(proc); tag_checkpoint.restore(proc); @@ -169,9 +169,9 @@ pub fn process_content( prev_sibling_closing_tag.replace(new_closing_tag); } ContentType::End => { - if prev_sibling_closing_tag - .exists_and(|prev_tag| !can_omit_as_last_node(proc, parent, prev_tag)) - { + if prev_sibling_closing_tag.exists_and(|prev_tag| { + !can_omit_as_last_node(proc.get_or_empty(parent), &proc[prev_tag]) + }) { prev_sibling_closing_tag.write(proc); }; break; diff --git a/rust/onepass/src/unit/tag.rs b/rust/onepass/src/unit/tag.rs index 05333d7..5acd641 100644 --- a/rust/onepass/src/unit/tag.rs +++ b/rust/onepass/src/unit/tag.rs @@ -107,7 +107,7 @@ pub fn process_tag( source_tag_name: ProcessorRange, ) -> ProcessingResult { if prev_sibling_closing_tag - .exists_and(|prev_tag| !can_omit_as_before(proc, Some(prev_tag), source_tag_name)) + .exists_and(|prev_tag| !can_omit_as_before(&proc[prev_tag], &proc[source_tag_name])) { prev_sibling_closing_tag.write(proc); }; @@ -237,7 +237,7 @@ pub fn process_tag( } }; - let can_omit_closing_tag = can_omit_as_last_node(proc, parent, tag_name); + let can_omit_closing_tag = can_omit_as_last_node(proc.get_or_empty(parent), &proc[tag_name]); if closing_tag_omitted || proc.at_end() && can_omit_closing_tag { return Ok(MaybeClosingTag(None)); };