Simplify and improve performance of whitespace minification strategy config
This commit is contained in:
parent
af8e93684a
commit
ba5fbc68f7
14
README.md
14
README.md
|
@ -109,7 +109,7 @@ There are three whitespace minification methods. When processing text content, h
|
|||
<details>
|
||||
<summary><strong>Collapse whitespace</strong></summary>
|
||||
|
||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/wss.rs) elements.
|
||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/whitespace.rs) elements.
|
||||
|
||||
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020).
|
||||
|
||||
|
@ -135,7 +135,7 @@ Reduce a sequence of whitespace characters in text nodes to a single space (U+00
|
|||
<details>
|
||||
<summary><strong>Destroy whole whitespace</strong></summary>
|
||||
|
||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/wss.rs), [content](./src/spec/tag/content.rs), [content-first](./src/spec/tag/contentfirst.rs), and [formatting](./src/spec/tag/formatting.rs) elements.
|
||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/whitespace.rs), [content](src/spec/tag/whitespace.rs), [content-first](./src/spec/tag/whitespace.rs), and [formatting](./src/spec/tag/whitespace.rs) elements.
|
||||
|
||||
Remove any text nodes that only consist of whitespace characters.
|
||||
|
||||
|
@ -163,7 +163,7 @@ Remove any text nodes that only consist of whitespace characters.
|
|||
<details>
|
||||
<summary><strong>Trim whitespace</strong></summary>
|
||||
|
||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/wss.rs) and [formatting](./src/spec/tag/formatting.rs) elements.
|
||||
> **Applies to:** any element except [whitespace sensitive](./src/spec/tag/whitespace.rs) and [formatting](./src/spec/tag/whitespace.rs) elements.
|
||||
|
||||
Remove any leading/trailing whitespace from any leading/trailing text nodes of a tag.
|
||||
|
||||
|
@ -194,10 +194,10 @@ hyperbuild recognises elements based on one of a few ways it assumes they are us
|
|||
|
||||
|Group|Elements|Expected children|
|
||||
|---|---|---|
|
||||
|Formatting|`a`, `strong`, [and others](./src/spec/tag/formatting.rs)|Formatting elements, text.|
|
||||
|Content|`h1`, `p`, [and others](./src/spec/tag/content.rs)|Formatting elements, text.|
|
||||
|Layout|`div`, `ul`, [and others](./src/spec/tag/layout.rs)|Layout elements, content elements.|
|
||||
|Content-first|`label`, `li`, [and others](./src/spec/tag/contentfirst.rs)|Like content but could be layout with only one child.|
|
||||
|Formatting|`a`, `strong`, [and others](./src/spec/tag/whitespace.rs)|Formatting elements, text.|
|
||||
|Content|`h1`, `p`, [and others](src/spec/tag/whitespace.rs)|Formatting elements, text.|
|
||||
|Layout|`div`, `ul`, [and others](./src/spec/tag/whitespace.rs)|Layout elements, content elements.|
|
||||
|Content-first|`label`, `li`, [and others](./src/spec/tag/whitespace.rs)|Like content but could be layout with only one child.|
|
||||
|
||||
<details>
|
||||
<summary><strong>Formatting elements</strong></summary>
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
use phf::{phf_set, Set};
|
||||
|
||||
pub static CONTENT_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"address",
|
||||
b"audio",
|
||||
b"button",
|
||||
b"canvas",
|
||||
b"caption",
|
||||
b"figcaption",
|
||||
b"h1",
|
||||
b"h2",
|
||||
b"h3",
|
||||
b"h4",
|
||||
b"h5",
|
||||
b"h6",
|
||||
b"legend",
|
||||
b"meter",
|
||||
b"object",
|
||||
b"option",
|
||||
b"p",
|
||||
b"summary",
|
||||
b"textarea",
|
||||
b"video",
|
||||
};
|
|
@ -1,17 +0,0 @@
|
|||
use phf::{phf_set, Set};
|
||||
|
||||
pub static CONTENT_FIRST_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"dd",
|
||||
b"details",
|
||||
b"dt",
|
||||
b"iframe",
|
||||
b"label",
|
||||
b"li",
|
||||
b"noscript",
|
||||
b"output",
|
||||
b"progress",
|
||||
b"slot",
|
||||
b"td",
|
||||
b"template",
|
||||
b"th",
|
||||
};
|
|
@ -1,36 +0,0 @@
|
|||
use phf::{phf_set, Set};
|
||||
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element#Inline_text_semantics.
|
||||
// Differences to tags listed in table at above URL: -br, +del, +ins.
|
||||
pub static FORMATTING_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"a",
|
||||
b"abbr",
|
||||
b"b",
|
||||
b"bdi",
|
||||
b"bdo",
|
||||
b"cite",
|
||||
b"data",
|
||||
b"del",
|
||||
b"dfn",
|
||||
b"em",
|
||||
b"i",
|
||||
b"ins",
|
||||
b"kbd",
|
||||
b"mark",
|
||||
b"q",
|
||||
b"rp",
|
||||
b"rt",
|
||||
b"rtc",
|
||||
b"ruby",
|
||||
b"s",
|
||||
b"samp",
|
||||
b"small",
|
||||
b"span",
|
||||
b"strong",
|
||||
b"sub",
|
||||
b"sup",
|
||||
b"time",
|
||||
b"u",
|
||||
b"var",
|
||||
b"wbr",
|
||||
};
|
|
@ -1,36 +0,0 @@
|
|||
use phf::{phf_set, Set};
|
||||
|
||||
pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"article",
|
||||
b"aside",
|
||||
b"blockquote",
|
||||
b"body",
|
||||
b"colgroup",
|
||||
b"datalist",
|
||||
b"dialog",
|
||||
b"div",
|
||||
b"dl",
|
||||
b"fieldset",
|
||||
b"figure",
|
||||
b"footer",
|
||||
b"form",
|
||||
b"head",
|
||||
b"header",
|
||||
b"hgroup",
|
||||
b"html",
|
||||
b"main",
|
||||
b"map",
|
||||
b"menu",
|
||||
b"nav",
|
||||
b"ol",
|
||||
b"optgroup",
|
||||
b"picture",
|
||||
b"section",
|
||||
b"select",
|
||||
b"table",
|
||||
b"tbody",
|
||||
b"tfoot",
|
||||
b"thead",
|
||||
b"tr",
|
||||
b"ul",
|
||||
};
|
|
@ -1,7 +1,3 @@
|
|||
pub mod content;
|
||||
pub mod contentfirst;
|
||||
pub mod formatting;
|
||||
pub mod layout;
|
||||
pub mod omission;
|
||||
pub mod void;
|
||||
pub mod wss;
|
||||
pub mod whitespace;
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
use phf::{phf_map, Map};
|
||||
|
||||
pub struct WhitespaceMinification {
|
||||
pub collapse: bool,
|
||||
pub destroy_whole: bool,
|
||||
pub trim: bool,
|
||||
}
|
||||
|
||||
static CONTENT: &WhitespaceMinification = &WhitespaceMinification {
|
||||
collapse: true,
|
||||
destroy_whole: false,
|
||||
trim: true,
|
||||
};
|
||||
|
||||
static CONTENT_FIRST: &WhitespaceMinification = &WhitespaceMinification {
|
||||
collapse: true,
|
||||
destroy_whole: false,
|
||||
trim: true,
|
||||
};
|
||||
|
||||
static FORMATTING: &WhitespaceMinification = &WhitespaceMinification {
|
||||
collapse: true,
|
||||
destroy_whole: false,
|
||||
trim: false,
|
||||
};
|
||||
|
||||
static LAYOUT: &WhitespaceMinification = &WhitespaceMinification {
|
||||
collapse: true,
|
||||
destroy_whole: true,
|
||||
trim: true,
|
||||
};
|
||||
|
||||
static WHITESPACE_SENSITIVE: &WhitespaceMinification = &WhitespaceMinification {
|
||||
collapse: false,
|
||||
destroy_whole: false,
|
||||
trim: false,
|
||||
};
|
||||
|
||||
static DEFAULT: &WhitespaceMinification = &WhitespaceMinification {
|
||||
collapse: true,
|
||||
destroy_whole: false,
|
||||
trim: false,
|
||||
};
|
||||
|
||||
static TAG_WHITESPACE_MINIFICATION: Map<&'static [u8], &'static WhitespaceMinification> = phf_map! {
|
||||
// Content tags.
|
||||
b"address" => CONTENT,
|
||||
b"audio" => CONTENT,
|
||||
b"button" => CONTENT,
|
||||
b"canvas" => CONTENT,
|
||||
b"caption" => CONTENT,
|
||||
b"figcaption" => CONTENT,
|
||||
b"h1" => CONTENT,
|
||||
b"h2" => CONTENT,
|
||||
b"h3" => CONTENT,
|
||||
b"h4" => CONTENT,
|
||||
b"h5" => CONTENT,
|
||||
b"h6" => CONTENT,
|
||||
b"legend" => CONTENT,
|
||||
b"meter" => CONTENT,
|
||||
b"object" => CONTENT,
|
||||
b"option" => CONTENT,
|
||||
b"p" => CONTENT,
|
||||
b"summary" => CONTENT,
|
||||
b"textarea" => CONTENT,
|
||||
b"video" => CONTENT,
|
||||
|
||||
// Content-first tags.
|
||||
b"dd" => CONTENT_FIRST,
|
||||
b"details" => CONTENT_FIRST,
|
||||
b"dt" => CONTENT_FIRST,
|
||||
b"iframe" => CONTENT_FIRST,
|
||||
b"label" => CONTENT_FIRST,
|
||||
b"li" => CONTENT_FIRST,
|
||||
b"noscript" => CONTENT_FIRST,
|
||||
b"output" => CONTENT_FIRST,
|
||||
b"progress" => CONTENT_FIRST,
|
||||
b"slot" => CONTENT_FIRST,
|
||||
b"td" => CONTENT_FIRST,
|
||||
b"template" => CONTENT_FIRST,
|
||||
b"th" => CONTENT_FIRST,
|
||||
|
||||
// Formatting tags.
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element#Inline_text_semantics.
|
||||
// Differences to tags listed in table at above URL: -br, +del, +ins.
|
||||
b"a" => FORMATTING,
|
||||
b"abbr" => FORMATTING,
|
||||
b"b" => FORMATTING,
|
||||
b"bdi" => FORMATTING,
|
||||
b"bdo" => FORMATTING,
|
||||
b"cite" => FORMATTING,
|
||||
b"data" => FORMATTING,
|
||||
b"del" => FORMATTING,
|
||||
b"dfn" => FORMATTING,
|
||||
b"em" => FORMATTING,
|
||||
b"i" => FORMATTING,
|
||||
b"ins" => FORMATTING,
|
||||
b"kbd" => FORMATTING,
|
||||
b"mark" => FORMATTING,
|
||||
b"q" => FORMATTING,
|
||||
b"rp" => FORMATTING,
|
||||
b"rt" => FORMATTING,
|
||||
b"rtc" => FORMATTING,
|
||||
b"ruby" => FORMATTING,
|
||||
b"s" => FORMATTING,
|
||||
b"samp" => FORMATTING,
|
||||
b"small" => FORMATTING,
|
||||
b"span" => FORMATTING,
|
||||
b"strong" => FORMATTING,
|
||||
b"sub" => FORMATTING,
|
||||
b"sup" => FORMATTING,
|
||||
b"time" => FORMATTING,
|
||||
b"u" => FORMATTING,
|
||||
b"var" => FORMATTING,
|
||||
b"wbr" => FORMATTING,
|
||||
|
||||
// Layout tags.
|
||||
b"article" => LAYOUT,
|
||||
b"aside" => LAYOUT,
|
||||
b"blockquote" => LAYOUT,
|
||||
b"body" => LAYOUT,
|
||||
b"colgroup" => LAYOUT,
|
||||
b"datalist" => LAYOUT,
|
||||
b"dialog" => LAYOUT,
|
||||
b"div" => LAYOUT,
|
||||
b"dl" => LAYOUT,
|
||||
b"fieldset" => LAYOUT,
|
||||
b"figure" => LAYOUT,
|
||||
b"footer" => LAYOUT,
|
||||
b"form" => LAYOUT,
|
||||
b"head" => LAYOUT,
|
||||
b"header" => LAYOUT,
|
||||
b"hgroup" => LAYOUT,
|
||||
b"html" => LAYOUT,
|
||||
b"main" => LAYOUT,
|
||||
b"map" => LAYOUT,
|
||||
b"menu" => LAYOUT,
|
||||
b"nav" => LAYOUT,
|
||||
b"ol" => LAYOUT,
|
||||
b"optgroup" => LAYOUT,
|
||||
b"picture" => LAYOUT,
|
||||
b"section" => LAYOUT,
|
||||
b"select" => LAYOUT,
|
||||
b"table" => LAYOUT,
|
||||
b"tbody" => LAYOUT,
|
||||
b"tfoot" => LAYOUT,
|
||||
b"thead" => LAYOUT,
|
||||
b"tr" => LAYOUT,
|
||||
b"ul" => LAYOUT,
|
||||
|
||||
// Whitespace-sensitive tags.
|
||||
b"code" => WHITESPACE_SENSITIVE,
|
||||
b"pre" => WHITESPACE_SENSITIVE,
|
||||
};
|
||||
|
||||
pub fn get_whitespace_minification_for_tag(tag_name: Option<&[u8]>) -> &'static WhitespaceMinification {
|
||||
tag_name.and_then(|n| TAG_WHITESPACE_MINIFICATION.get(n)).unwrap_or(&DEFAULT)
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
use phf::{phf_set, Set};
|
||||
|
||||
// "WSS" stands for whitespace-sensitive.
|
||||
pub static WSS_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"code",
|
||||
b"pre",
|
||||
};
|
|
@ -1,16 +1,13 @@
|
|||
use crate::err::ProcessingResult;
|
||||
use crate::proc::{Processor, ProcessorRange, UnintentionalEntityPrevention};
|
||||
use crate::spec::codepoint::is_whitespace;
|
||||
use crate::spec::tag::content::CONTENT_TAGS;
|
||||
use crate::spec::tag::contentfirst::CONTENT_FIRST_TAGS;
|
||||
use crate::spec::tag::formatting::FORMATTING_TAGS;
|
||||
use crate::spec::tag::omission::CLOSING_TAG_OMISSION_RULES;
|
||||
use crate::spec::tag::wss::WSS_TAGS;
|
||||
use crate::unit::bang::process_bang;
|
||||
use crate::unit::comment::process_comment;
|
||||
use crate::unit::entity::{EntityType, parse_entity};
|
||||
use crate::unit::instruction::process_instruction;
|
||||
use crate::unit::tag::{process_tag, ProcessedTag};
|
||||
use crate::spec::tag::whitespace::{get_whitespace_minification_for_tag, WhitespaceMinification};
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
enum ContentType {
|
||||
|
@ -132,23 +129,9 @@ fn process_wss_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
|||
}
|
||||
|
||||
pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
|
||||
let collapse_whitespace = match parent {
|
||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]),
|
||||
// Should collapse whitespace for root content.
|
||||
None => true,
|
||||
};
|
||||
let destroy_whole_whitespace = match parent {
|
||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !CONTENT_TAGS.contains(&proc[tag_name]) && !CONTENT_FIRST_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
|
||||
// Should destroy whole whitespace for root content.
|
||||
None => true,
|
||||
};
|
||||
let trim_whitespace = match parent {
|
||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
|
||||
// Should trim whitespace for root content.
|
||||
None => true,
|
||||
};
|
||||
let &WhitespaceMinification { collapse, destroy_whole, trim } = get_whitespace_minification_for_tag(parent.map(|r| &proc[r]));
|
||||
|
||||
if !(collapse_whitespace || destroy_whole_whitespace || trim_whitespace) {
|
||||
if !(collapse || destroy_whole || trim) {
|
||||
// Normally whitespace entities are decoded and then ignored.
|
||||
// However, if whitespace cannot be minified in any way,
|
||||
// and we can't actually do anything but write whitespace as is,
|
||||
|
@ -205,13 +188,13 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
|||
|
||||
// Next character is not whitespace, so handle any previously ignored whitespace.
|
||||
if currently_in_whitespace {
|
||||
if destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_instruction_opening_tag() && next_content_type.is_comment_bang_instruction_opening_tag() {
|
||||
if destroy_whole && last_non_whitespace_content_type.is_comment_bang_instruction_opening_tag() && next_content_type.is_comment_bang_instruction_opening_tag() {
|
||||
// Whitespace is between two tags, comments, or bangs.
|
||||
// destroy_whole_whitespace is on, so don't write it.
|
||||
} else if trim_whitespace && (last_non_whitespace_content_type == ContentType::Start || next_content_type == ContentType::End) {
|
||||
// `destroy_whole` is on, so don't write it.
|
||||
} else if trim && (last_non_whitespace_content_type == ContentType::Start || next_content_type == ContentType::End) {
|
||||
// Whitespace is leading or trailing.
|
||||
// trim_whitespace is on, so don't write it.
|
||||
} else if collapse_whitespace {
|
||||
// `trim` is on, so don't write it.
|
||||
} else if collapse {
|
||||
// Current contiguous whitespace needs to be reduced to a single space character.
|
||||
proc.write(b' ');
|
||||
// If writing space, then prev_sibling_closing_tag no longer represents immediate previous sibling node.
|
||||
|
|
Loading…
Reference in New Issue