Simplify and improve performance of whitespace minification strategy config
This commit is contained in:
parent
af8e93684a
commit
ba5fbc68f7
14
README.md
14
README.md
|
@ -109,7 +109,7 @@ There are three whitespace minification methods. When processing text content, h
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Collapse whitespace</strong></summary>
|
<summary><strong>Collapse whitespace</strong></summary>
|
||||||
|
|
||||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/wss.rs) elements.
|
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/whitespace.rs) elements.
|
||||||
|
|
||||||
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020).
|
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020).
|
||||||
|
|
||||||
|
@ -135,7 +135,7 @@ Reduce a sequence of whitespace characters in text nodes to a single space (U+00
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Destroy whole whitespace</strong></summary>
|
<summary><strong>Destroy whole whitespace</strong></summary>
|
||||||
|
|
||||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/wss.rs), [content](./src/spec/tag/content.rs), [content-first](./src/spec/tag/contentfirst.rs), and [formatting](./src/spec/tag/formatting.rs) elements.
|
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/whitespace.rs), [content](src/spec/tag/whitespace.rs), [content-first](./src/spec/tag/whitespace.rs), and [formatting](./src/spec/tag/whitespace.rs) elements.
|
||||||
|
|
||||||
Remove any text nodes that only consist of whitespace characters.
|
Remove any text nodes that only consist of whitespace characters.
|
||||||
|
|
||||||
|
@ -163,7 +163,7 @@ Remove any text nodes that only consist of whitespace characters.
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Trim whitespace</strong></summary>
|
<summary><strong>Trim whitespace</strong></summary>
|
||||||
|
|
||||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/wss.rs) and [formatting](./src/spec/tag/formatting.rs) elements.
|
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/whitespace.rs) and [formatting](./src/spec/tag/whitespace.rs) elements.
|
||||||
|
|
||||||
Remove any leading/trailing whitespace from any leading/trailing text nodes of a tag.
|
Remove any leading/trailing whitespace from any leading/trailing text nodes of a tag.
|
||||||
|
|
||||||
|
@ -194,10 +194,10 @@ hyperbuild recognises elements based on one of a few ways it assumes they are us
|
||||||
|
|
||||||
|Group|Elements|Expected children|
|
|Group|Elements|Expected children|
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
|Formatting|`a`, `strong`, [and others](./src/spec/tag/formatting.rs)|Formatting elements, text.|
|
|Formatting|`a`, `strong`, [and others](./src/spec/tag/whitespace.rs)|Formatting elements, text.|
|
||||||
|Content|`h1`, `p`, [and others](./src/spec/tag/content.rs)|Formatting elements, text.|
|
|Content|`h1`, `p`, [and others](src/spec/tag/whitespace.rs)|Formatting elements, text.|
|
||||||
|Layout|`div`, `ul`, [and others](./src/spec/tag/layout.rs)|Layout elements, content elements.|
|
|Layout|`div`, `ul`, [and others](./src/spec/tag/whitespace.rs)|Layout elements, content elements.|
|
||||||
|Content-first|`label`, `li`, [and others](./src/spec/tag/contentfirst.rs)|Like content but could be layout with only one child.|
|
|Content-first|`label`, `li`, [and others](./src/spec/tag/whitespace.rs)|Like content but could be layout with only one child.|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>Formatting elements</strong></summary>
|
<summary><strong>Formatting elements</strong></summary>
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
use phf::{phf_set, Set};
|
|
||||||
|
|
||||||
pub static CONTENT_TAGS: Set<&'static [u8]> = phf_set! {
|
|
||||||
b"address",
|
|
||||||
b"audio",
|
|
||||||
b"button",
|
|
||||||
b"canvas",
|
|
||||||
b"caption",
|
|
||||||
b"figcaption",
|
|
||||||
b"h1",
|
|
||||||
b"h2",
|
|
||||||
b"h3",
|
|
||||||
b"h4",
|
|
||||||
b"h5",
|
|
||||||
b"h6",
|
|
||||||
b"legend",
|
|
||||||
b"meter",
|
|
||||||
b"object",
|
|
||||||
b"option",
|
|
||||||
b"p",
|
|
||||||
b"summary",
|
|
||||||
b"textarea",
|
|
||||||
b"video",
|
|
||||||
};
|
|
|
@ -1,17 +0,0 @@
|
||||||
use phf::{phf_set, Set};
|
|
||||||
|
|
||||||
pub static CONTENT_FIRST_TAGS: Set<&'static [u8]> = phf_set! {
|
|
||||||
b"dd",
|
|
||||||
b"details",
|
|
||||||
b"dt",
|
|
||||||
b"iframe",
|
|
||||||
b"label",
|
|
||||||
b"li",
|
|
||||||
b"noscript",
|
|
||||||
b"output",
|
|
||||||
b"progress",
|
|
||||||
b"slot",
|
|
||||||
b"td",
|
|
||||||
b"template",
|
|
||||||
b"th",
|
|
||||||
};
|
|
|
@ -1,36 +0,0 @@
|
||||||
use phf::{phf_set, Set};
|
|
||||||
|
|
||||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element#Inline_text_semantics.
|
|
||||||
// Differences to tags listed in table at above URL: -br, +del, +ins.
|
|
||||||
pub static FORMATTING_TAGS: Set<&'static [u8]> = phf_set! {
|
|
||||||
b"a",
|
|
||||||
b"abbr",
|
|
||||||
b"b",
|
|
||||||
b"bdi",
|
|
||||||
b"bdo",
|
|
||||||
b"cite",
|
|
||||||
b"data",
|
|
||||||
b"del",
|
|
||||||
b"dfn",
|
|
||||||
b"em",
|
|
||||||
b"i",
|
|
||||||
b"ins",
|
|
||||||
b"kbd",
|
|
||||||
b"mark",
|
|
||||||
b"q",
|
|
||||||
b"rp",
|
|
||||||
b"rt",
|
|
||||||
b"rtc",
|
|
||||||
b"ruby",
|
|
||||||
b"s",
|
|
||||||
b"samp",
|
|
||||||
b"small",
|
|
||||||
b"span",
|
|
||||||
b"strong",
|
|
||||||
b"sub",
|
|
||||||
b"sup",
|
|
||||||
b"time",
|
|
||||||
b"u",
|
|
||||||
b"var",
|
|
||||||
b"wbr",
|
|
||||||
};
|
|
|
@ -1,36 +0,0 @@
|
||||||
use phf::{phf_set, Set};
|
|
||||||
|
|
||||||
pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
|
|
||||||
b"article",
|
|
||||||
b"aside",
|
|
||||||
b"blockquote",
|
|
||||||
b"body",
|
|
||||||
b"colgroup",
|
|
||||||
b"datalist",
|
|
||||||
b"dialog",
|
|
||||||
b"div",
|
|
||||||
b"dl",
|
|
||||||
b"fieldset",
|
|
||||||
b"figure",
|
|
||||||
b"footer",
|
|
||||||
b"form",
|
|
||||||
b"head",
|
|
||||||
b"header",
|
|
||||||
b"hgroup",
|
|
||||||
b"html",
|
|
||||||
b"main",
|
|
||||||
b"map",
|
|
||||||
b"menu",
|
|
||||||
b"nav",
|
|
||||||
b"ol",
|
|
||||||
b"optgroup",
|
|
||||||
b"picture",
|
|
||||||
b"section",
|
|
||||||
b"select",
|
|
||||||
b"table",
|
|
||||||
b"tbody",
|
|
||||||
b"tfoot",
|
|
||||||
b"thead",
|
|
||||||
b"tr",
|
|
||||||
b"ul",
|
|
||||||
};
|
|
|
@ -1,7 +1,3 @@
|
||||||
pub mod content;
|
|
||||||
pub mod contentfirst;
|
|
||||||
pub mod formatting;
|
|
||||||
pub mod layout;
|
|
||||||
pub mod omission;
|
pub mod omission;
|
||||||
pub mod void;
|
pub mod void;
|
||||||
pub mod wss;
|
pub mod whitespace;
|
||||||
|
|
|
@ -0,0 +1,158 @@
|
||||||
|
use phf::{phf_map, Map};
|
||||||
|
|
||||||
|
pub struct WhitespaceMinification {
|
||||||
|
pub collapse: bool,
|
||||||
|
pub destroy_whole: bool,
|
||||||
|
pub trim: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
static CONTENT: &WhitespaceMinification = &WhitespaceMinification {
|
||||||
|
collapse: true,
|
||||||
|
destroy_whole: false,
|
||||||
|
trim: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
static CONTENT_FIRST: &WhitespaceMinification = &WhitespaceMinification {
|
||||||
|
collapse: true,
|
||||||
|
destroy_whole: false,
|
||||||
|
trim: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
static FORMATTING: &WhitespaceMinification = &WhitespaceMinification {
|
||||||
|
collapse: true,
|
||||||
|
destroy_whole: false,
|
||||||
|
trim: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
static LAYOUT: &WhitespaceMinification = &WhitespaceMinification {
|
||||||
|
collapse: true,
|
||||||
|
destroy_whole: true,
|
||||||
|
trim: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
static WHITESPACE_SENSITIVE: &WhitespaceMinification = &WhitespaceMinification {
|
||||||
|
collapse: false,
|
||||||
|
destroy_whole: false,
|
||||||
|
trim: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
static DEFAULT: &WhitespaceMinification = &WhitespaceMinification {
|
||||||
|
collapse: true,
|
||||||
|
destroy_whole: false,
|
||||||
|
trim: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
static TAG_WHITESPACE_MINIFICATION: Map<&'static [u8], &'static WhitespaceMinification> = phf_map! {
|
||||||
|
// Content tags.
|
||||||
|
b"address" => CONTENT,
|
||||||
|
b"audio" => CONTENT,
|
||||||
|
b"button" => CONTENT,
|
||||||
|
b"canvas" => CONTENT,
|
||||||
|
b"caption" => CONTENT,
|
||||||
|
b"figcaption" => CONTENT,
|
||||||
|
b"h1" => CONTENT,
|
||||||
|
b"h2" => CONTENT,
|
||||||
|
b"h3" => CONTENT,
|
||||||
|
b"h4" => CONTENT,
|
||||||
|
b"h5" => CONTENT,
|
||||||
|
b"h6" => CONTENT,
|
||||||
|
b"legend" => CONTENT,
|
||||||
|
b"meter" => CONTENT,
|
||||||
|
b"object" => CONTENT,
|
||||||
|
b"option" => CONTENT,
|
||||||
|
b"p" => CONTENT,
|
||||||
|
b"summary" => CONTENT,
|
||||||
|
b"textarea" => CONTENT,
|
||||||
|
b"video" => CONTENT,
|
||||||
|
|
||||||
|
// Content-first tags.
|
||||||
|
b"dd" => CONTENT_FIRST,
|
||||||
|
b"details" => CONTENT_FIRST,
|
||||||
|
b"dt" => CONTENT_FIRST,
|
||||||
|
b"iframe" => CONTENT_FIRST,
|
||||||
|
b"label" => CONTENT_FIRST,
|
||||||
|
b"li" => CONTENT_FIRST,
|
||||||
|
b"noscript" => CONTENT_FIRST,
|
||||||
|
b"output" => CONTENT_FIRST,
|
||||||
|
b"progress" => CONTENT_FIRST,
|
||||||
|
b"slot" => CONTENT_FIRST,
|
||||||
|
b"td" => CONTENT_FIRST,
|
||||||
|
b"template" => CONTENT_FIRST,
|
||||||
|
b"th" => CONTENT_FIRST,
|
||||||
|
|
||||||
|
// Formatting tags.
|
||||||
|
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element#Inline_text_semantics.
|
||||||
|
// Differences to tags listed in table at above URL: -br, +del, +ins.
|
||||||
|
b"a" => FORMATTING,
|
||||||
|
b"abbr" => FORMATTING,
|
||||||
|
b"b" => FORMATTING,
|
||||||
|
b"bdi" => FORMATTING,
|
||||||
|
b"bdo" => FORMATTING,
|
||||||
|
b"cite" => FORMATTING,
|
||||||
|
b"data" => FORMATTING,
|
||||||
|
b"del" => FORMATTING,
|
||||||
|
b"dfn" => FORMATTING,
|
||||||
|
b"em" => FORMATTING,
|
||||||
|
b"i" => FORMATTING,
|
||||||
|
b"ins" => FORMATTING,
|
||||||
|
b"kbd" => FORMATTING,
|
||||||
|
b"mark" => FORMATTING,
|
||||||
|
b"q" => FORMATTING,
|
||||||
|
b"rp" => FORMATTING,
|
||||||
|
b"rt" => FORMATTING,
|
||||||
|
b"rtc" => FORMATTING,
|
||||||
|
b"ruby" => FORMATTING,
|
||||||
|
b"s" => FORMATTING,
|
||||||
|
b"samp" => FORMATTING,
|
||||||
|
b"small" => FORMATTING,
|
||||||
|
b"span" => FORMATTING,
|
||||||
|
b"strong" => FORMATTING,
|
||||||
|
b"sub" => FORMATTING,
|
||||||
|
b"sup" => FORMATTING,
|
||||||
|
b"time" => FORMATTING,
|
||||||
|
b"u" => FORMATTING,
|
||||||
|
b"var" => FORMATTING,
|
||||||
|
b"wbr" => FORMATTING,
|
||||||
|
|
||||||
|
// Layout tags.
|
||||||
|
b"article" => LAYOUT,
|
||||||
|
b"aside" => LAYOUT,
|
||||||
|
b"blockquote" => LAYOUT,
|
||||||
|
b"body" => LAYOUT,
|
||||||
|
b"colgroup" => LAYOUT,
|
||||||
|
b"datalist" => LAYOUT,
|
||||||
|
b"dialog" => LAYOUT,
|
||||||
|
b"div" => LAYOUT,
|
||||||
|
b"dl" => LAYOUT,
|
||||||
|
b"fieldset" => LAYOUT,
|
||||||
|
b"figure" => LAYOUT,
|
||||||
|
b"footer" => LAYOUT,
|
||||||
|
b"form" => LAYOUT,
|
||||||
|
b"head" => LAYOUT,
|
||||||
|
b"header" => LAYOUT,
|
||||||
|
b"hgroup" => LAYOUT,
|
||||||
|
b"html" => LAYOUT,
|
||||||
|
b"main" => LAYOUT,
|
||||||
|
b"map" => LAYOUT,
|
||||||
|
b"menu" => LAYOUT,
|
||||||
|
b"nav" => LAYOUT,
|
||||||
|
b"ol" => LAYOUT,
|
||||||
|
b"optgroup" => LAYOUT,
|
||||||
|
b"picture" => LAYOUT,
|
||||||
|
b"section" => LAYOUT,
|
||||||
|
b"select" => LAYOUT,
|
||||||
|
b"table" => LAYOUT,
|
||||||
|
b"tbody" => LAYOUT,
|
||||||
|
b"tfoot" => LAYOUT,
|
||||||
|
b"thead" => LAYOUT,
|
||||||
|
b"tr" => LAYOUT,
|
||||||
|
b"ul" => LAYOUT,
|
||||||
|
|
||||||
|
// Whitespace-sensitive tags.
|
||||||
|
b"code" => WHITESPACE_SENSITIVE,
|
||||||
|
b"pre" => WHITESPACE_SENSITIVE,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn get_whitespace_minification_for_tag(tag_name: Option<&[u8]>) -> &'static WhitespaceMinification {
|
||||||
|
tag_name.and_then(|n| TAG_WHITESPACE_MINIFICATION.get(n)).unwrap_or(&DEFAULT)
|
||||||
|
}
|
|
@ -1,7 +0,0 @@
|
||||||
use phf::{phf_set, Set};
|
|
||||||
|
|
||||||
// "WSS" stands for whitespace-sensitive.
|
|
||||||
pub static WSS_TAGS: Set<&'static [u8]> = phf_set! {
|
|
||||||
b"code",
|
|
||||||
b"pre",
|
|
||||||
};
|
|
|
@ -1,16 +1,13 @@
|
||||||
use crate::err::ProcessingResult;
|
use crate::err::ProcessingResult;
|
||||||
use crate::proc::{Processor, ProcessorRange, UnintentionalEntityPrevention};
|
use crate::proc::{Processor, ProcessorRange, UnintentionalEntityPrevention};
|
||||||
use crate::spec::codepoint::is_whitespace;
|
use crate::spec::codepoint::is_whitespace;
|
||||||
use crate::spec::tag::content::CONTENT_TAGS;
|
|
||||||
use crate::spec::tag::contentfirst::CONTENT_FIRST_TAGS;
|
|
||||||
use crate::spec::tag::formatting::FORMATTING_TAGS;
|
|
||||||
use crate::spec::tag::omission::CLOSING_TAG_OMISSION_RULES;
|
use crate::spec::tag::omission::CLOSING_TAG_OMISSION_RULES;
|
||||||
use crate::spec::tag::wss::WSS_TAGS;
|
|
||||||
use crate::unit::bang::process_bang;
|
use crate::unit::bang::process_bang;
|
||||||
use crate::unit::comment::process_comment;
|
use crate::unit::comment::process_comment;
|
||||||
use crate::unit::entity::{EntityType, parse_entity};
|
use crate::unit::entity::{EntityType, parse_entity};
|
||||||
use crate::unit::instruction::process_instruction;
|
use crate::unit::instruction::process_instruction;
|
||||||
use crate::unit::tag::{process_tag, ProcessedTag};
|
use crate::unit::tag::{process_tag, ProcessedTag};
|
||||||
|
use crate::spec::tag::whitespace::{get_whitespace_minification_for_tag, WhitespaceMinification};
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||||
enum ContentType {
|
enum ContentType {
|
||||||
|
@ -132,23 +129,9 @@ fn process_wss_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
|
pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
|
||||||
let collapse_whitespace = match parent {
|
let &WhitespaceMinification { collapse, destroy_whole, trim } = get_whitespace_minification_for_tag(parent.map(|r| &proc[r]));
|
||||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]),
|
|
||||||
// Should collapse whitespace for root content.
|
|
||||||
None => true,
|
|
||||||
};
|
|
||||||
let destroy_whole_whitespace = match parent {
|
|
||||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !CONTENT_TAGS.contains(&proc[tag_name]) && !CONTENT_FIRST_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
|
|
||||||
// Should destroy whole whitespace for root content.
|
|
||||||
None => true,
|
|
||||||
};
|
|
||||||
let trim_whitespace = match parent {
|
|
||||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
|
|
||||||
// Should trim whitespace for root content.
|
|
||||||
None => true,
|
|
||||||
};
|
|
||||||
|
|
||||||
if !(collapse_whitespace || destroy_whole_whitespace || trim_whitespace) {
|
if !(collapse || destroy_whole || trim) {
|
||||||
// Normally whitespace entities are decoded and then ignored.
|
// Normally whitespace entities are decoded and then ignored.
|
||||||
// However, if whitespace cannot be minified in any way,
|
// However, if whitespace cannot be minified in any way,
|
||||||
// and we can't actually do anything but write whitespace as is,
|
// and we can't actually do anything but write whitespace as is,
|
||||||
|
@ -205,13 +188,13 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
||||||
|
|
||||||
// Next character is not whitespace, so handle any previously ignored whitespace.
|
// Next character is not whitespace, so handle any previously ignored whitespace.
|
||||||
if currently_in_whitespace {
|
if currently_in_whitespace {
|
||||||
if destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_instruction_opening_tag() && next_content_type.is_comment_bang_instruction_opening_tag() {
|
if destroy_whole && last_non_whitespace_content_type.is_comment_bang_instruction_opening_tag() && next_content_type.is_comment_bang_instruction_opening_tag() {
|
||||||
// Whitespace is between two tags, comments, or bangs.
|
// Whitespace is between two tags, comments, or bangs.
|
||||||
// destroy_whole_whitespace is on, so don't write it.
|
// `destroy_whole` is on, so don't write it.
|
||||||
} else if trim_whitespace && (last_non_whitespace_content_type == ContentType::Start || next_content_type == ContentType::End) {
|
} else if trim && (last_non_whitespace_content_type == ContentType::Start || next_content_type == ContentType::End) {
|
||||||
// Whitespace is leading or trailing.
|
// Whitespace is leading or trailing.
|
||||||
// trim_whitespace is on, so don't write it.
|
// `trim` is on, so don't write it.
|
||||||
} else if collapse_whitespace {
|
} else if collapse {
|
||||||
// Current contiguous whitespace needs to be reduced to a single space character.
|
// Current contiguous whitespace needs to be reduced to a single space character.
|
||||||
proc.write(b' ');
|
proc.write(b' ');
|
||||||
// If writing space, then prev_sibling_closing_tag no longer represents immediate previous sibling node.
|
// If writing space, then prev_sibling_closing_tag no longer represents immediate previous sibling node.
|
||||||
|
|
Loading…
Reference in New Issue