Implement whitespace minification
This commit is contained in:
parent
1a930a170d
commit
383b2b3423
|
@ -3,7 +3,13 @@
|
|||
<head>
|
||||
<meta charset="utf-8">
|
||||
</head>
|
||||
<body><root><svg><circle r=1/>2</svg>
|
||||
<body>
|
||||
<div>&l<!-- -->t;</div>
|
||||
<div>x<!ac > a <!ac > b <!ac > c</div>
|
||||
<div>x<? ?> a <? > b <? > c</div>
|
||||
<div>x<!-- --> a <!-- --> b <!-- --> c</div>
|
||||
<div>x a b c</div>
|
||||
<root><svg><circle r=1/>2</svg>
|
||||
<DIV>
|
||||
<span lang=/></div>
|
||||
<DIV>
|
||||
|
|
|
@ -41,7 +41,6 @@ pub struct ByNamespace {
|
|||
}
|
||||
|
||||
impl ByNamespace {
|
||||
#[inline(always)]
|
||||
fn get(&self, ns: Namespace) -> Option<&AttrMapEntry> {
|
||||
match ns {
|
||||
Namespace::Html => self.html.as_ref(),
|
||||
|
@ -53,12 +52,10 @@ impl ByNamespace {
|
|||
pub struct AttrMap(HashMap<&'static [u8], ByNamespace>);
|
||||
|
||||
impl AttrMap {
|
||||
#[inline(always)]
|
||||
pub const fn new(map: HashMap<&'static [u8], ByNamespace>) -> AttrMap {
|
||||
AttrMap(map)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get(&self, ns: Namespace, tag: &[u8], attr: &[u8]) -> Option<&AttributeMinification> {
|
||||
self.0.get(attr).and_then(|namespaces| namespaces.get(ns)).and_then(|entry| match entry {
|
||||
AttrMapEntry::AllNamespaceElements(min) => Some(min),
|
||||
|
|
|
@ -59,7 +59,6 @@ pub struct Lookup {
|
|||
impl std::ops::Index<u8> for Lookup {
|
||||
type Output = bool;
|
||||
|
||||
#[inline(always)]
|
||||
fn index(&self, c: u8) -> &Self::Output {
|
||||
// \`c\` is definitely below 256 so it's always safe to directly index table without checking.
|
||||
unsafe {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::spec::tag::ns::Namespace;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
|
@ -32,6 +33,7 @@ pub enum NodeData {
|
|||
// If the source doesn't have a closing tag, then we can't add one, as otherwise output could be longer than source.
|
||||
closing_tag: ElementClosingTag,
|
||||
name: Vec<u8>,
|
||||
namespace: Namespace,
|
||||
},
|
||||
Instruction {
|
||||
code: Vec<u8>,
|
||||
|
@ -47,3 +49,12 @@ pub enum NodeData {
|
|||
value: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
impl NodeData {
|
||||
pub fn is_element(&self) -> bool {
|
||||
match self {
|
||||
NodeData::Element { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,6 +45,6 @@ pub fn minify(src: &[u8], cfg: &Cfg) -> Vec<u8> {
|
|||
EMPTY_TAG_NAME,
|
||||
);
|
||||
let mut out = Vec::with_capacity(src.len());
|
||||
minify_content(cfg, &mut out, EMPTY_TAG_NAME, &parsed.children);
|
||||
minify_content(cfg, &mut out, false, EMPTY_TAG_NAME, parsed.children);
|
||||
out
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use lazy_static::lazy_static;
|
|||
|
||||
use crate::gen::codepoints::DIGIT;
|
||||
use crate::pattern::Replacer;
|
||||
use std::cmp::{min, Ordering};
|
||||
|
||||
fn build_double_quoted_replacer() -> Replacer {
|
||||
let mut patterns = Vec::<Vec<u8>>::new();
|
||||
|
@ -101,35 +102,67 @@ lazy_static! {
|
|||
static ref UNQUOTED_QUOTED_REPLACER: Replacer = build_unquoted_replacer();
|
||||
}
|
||||
|
||||
struct MinifiedVal {
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
pub enum AttrType {
|
||||
None,
|
||||
Quoted,
|
||||
Unquoted,
|
||||
}
|
||||
|
||||
pub struct AttrValMinified {
|
||||
typ: AttrType,
|
||||
prefix: &'static [u8],
|
||||
data: Vec<u8>,
|
||||
start: usize,
|
||||
suffix: &'static [u8],
|
||||
}
|
||||
|
||||
impl MinifiedVal {
|
||||
impl Eq for AttrValMinified {}
|
||||
|
||||
impl PartialEq<Self> for AttrValMinified {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.len() == other.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd<Self> for AttrValMinified {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
self.len().partial_cmp(&other.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for AttrValMinified {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.len().cmp(&other.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl AttrValMinified {
|
||||
pub fn len(&self) -> usize {
|
||||
self.prefix.len() + (self.data.len() - self.start) + self.suffix.len()
|
||||
}
|
||||
|
||||
pub fn res(&self) -> Vec<u8> {
|
||||
let mut res = Vec::<u8>::with_capacity(self.len());
|
||||
res.extend_from_slice(self.prefix);
|
||||
res.extend_from_slice(&self.data[self.start..]);
|
||||
res.extend_from_slice(self.suffix);
|
||||
res
|
||||
pub fn out(&self, out: &mut Vec<u8>) -> () {
|
||||
out.extend_from_slice(self.prefix);
|
||||
out.extend_from_slice(&self.data[self.start..]);
|
||||
out.extend_from_slice(self.suffix);
|
||||
}
|
||||
|
||||
pub fn typ(&self) -> AttrType {
|
||||
self.typ
|
||||
}
|
||||
}
|
||||
|
||||
pub fn minify_attr_val(val: &[u8]) -> Vec<u8> {
|
||||
let double_quoted = MinifiedVal {
|
||||
pub fn minify_attr_val(val: &[u8]) -> AttrValMinified {
|
||||
let double_quoted = AttrValMinified {
|
||||
typ: AttrType::Quoted,
|
||||
prefix: b"\"",
|
||||
data: DOUBLE_QUOTED_REPLACER.replace_all(val),
|
||||
start: 0,
|
||||
suffix: b"\"",
|
||||
};
|
||||
let single_quoted = MinifiedVal {
|
||||
let single_quoted = AttrValMinified {
|
||||
typ: AttrType::Quoted,
|
||||
prefix: b"'",
|
||||
data: SINGLE_QUOTED_REPLACER.replace_all(val),
|
||||
start: 0,
|
||||
|
@ -149,7 +182,8 @@ pub fn minify_attr_val(val: &[u8]) -> Vec<u8> {
|
|||
_ => b"",
|
||||
};
|
||||
let start = if !first_char_encoded.is_empty() { 1 } else { 0 };
|
||||
MinifiedVal {
|
||||
AttrValMinified {
|
||||
typ: AttrType::Unquoted,
|
||||
prefix: b"",
|
||||
data: res,
|
||||
start,
|
||||
|
@ -158,12 +192,5 @@ pub fn minify_attr_val(val: &[u8]) -> Vec<u8> {
|
|||
};
|
||||
|
||||
// When lengths are equal, prefer double quotes to all and single quotes to unquoted.
|
||||
let mut min = double_quoted;
|
||||
if single_quoted.len() < min.len() {
|
||||
min = single_quoted;
|
||||
};
|
||||
if unquoted.len() < min.len() {
|
||||
min = unquoted;
|
||||
};
|
||||
min.res()
|
||||
min(min(double_quoted, single_quoted), unquoted)
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ use lazy_static::lazy_static;
|
|||
|
||||
use crate::ast::{NodeData, ScriptOrStyleLang};
|
||||
use crate::cfg::Cfg;
|
||||
use crate::gen::codepoints::TAG_NAME_CHAR;
|
||||
use crate::gen::codepoints::{TAG_NAME_CHAR, WHITESPACE};
|
||||
use crate::minify::bang::minify_bang;
|
||||
use crate::minify::comment::minify_comment;
|
||||
use crate::minify::css::minify_css;
|
||||
|
@ -12,6 +12,8 @@ use crate::minify::instruction::minify_instruction;
|
|||
use crate::minify::js::minify_js;
|
||||
use crate::pattern::Replacer;
|
||||
use crate::spec::entity::encode::encode_ampersands;
|
||||
use crate::spec::tag::ns::Namespace;
|
||||
use crate::spec::tag::whitespace::{get_whitespace_minification_for_tag, WhitespaceMinification};
|
||||
use crate::spec::tag::EMPTY_TAG_NAME;
|
||||
|
||||
fn build_chevron_replacer() -> Replacer {
|
||||
|
@ -39,53 +41,143 @@ lazy_static! {
|
|||
static ref CHEVRON_REPLACER: Replacer = build_chevron_replacer();
|
||||
}
|
||||
|
||||
fn left_trim(val: &mut Vec<u8>) -> () {
|
||||
let mut len = 0;
|
||||
while val.get(len).filter(|&&c| WHITESPACE[c]).is_some() {
|
||||
len += 1;
|
||||
}
|
||||
val.drain(0..len);
|
||||
}
|
||||
|
||||
fn right_trim(val: &mut Vec<u8>) -> () {
|
||||
let mut retain = val.len();
|
||||
while retain > 0 && val.get(retain - 1).filter(|&&c| WHITESPACE[c]).is_some() {
|
||||
retain -= 1;
|
||||
}
|
||||
val.truncate(retain);
|
||||
}
|
||||
|
||||
fn collapse_whitespace(val: &mut Vec<u8>) -> () {
|
||||
let mut write = 0;
|
||||
let mut in_whitespace = false;
|
||||
for i in 0..val.len() {
|
||||
let mut c = val[i];
|
||||
if WHITESPACE[c] {
|
||||
if in_whitespace {
|
||||
// Skip this character.
|
||||
continue;
|
||||
};
|
||||
in_whitespace = true;
|
||||
c = b' ';
|
||||
};
|
||||
val[write] = c;
|
||||
write += 1;
|
||||
}
|
||||
val.truncate(write);
|
||||
}
|
||||
|
||||
fn is_all_whitespace(val: &[u8]) -> bool {
|
||||
for &c in val {
|
||||
if !WHITESPACE[c] {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
pub fn minify_content(
|
||||
cfg: &Cfg,
|
||||
out: &mut Vec<u8>,
|
||||
descendant_of_pre: bool,
|
||||
// Use empty slice if none.
|
||||
parent: &[u8],
|
||||
nodes: &[NodeData],
|
||||
mut nodes: Vec<NodeData>,
|
||||
) -> () {
|
||||
let mut index_of_last_text_or_elem_child = (nodes.len() as isize) - 1;
|
||||
while index_of_last_text_or_elem_child >= 0 {
|
||||
match nodes[index_of_last_text_or_elem_child as usize] {
|
||||
NodeData::Text { .. } | NodeData::Element { .. } => break,
|
||||
_ => index_of_last_text_or_elem_child -= 1,
|
||||
let &WhitespaceMinification {
|
||||
collapse,
|
||||
destroy_whole,
|
||||
trim,
|
||||
} = get_whitespace_minification_for_tag(parent, descendant_of_pre);
|
||||
|
||||
// TODO Document or fix: even though bangs/comments/etc. don't affect layout, we don't collapse/destroy-whole/trim combined text nodes across bangs/comments/etc., as that's too complex and is ambiguous about which nodes should whitespace be deleted from.
|
||||
let mut found_first_text_or_elem = false;
|
||||
let mut index_of_last_nonempty_text_or_elem: isize = -1;
|
||||
let mut index_of_last_text_or_elem: isize = -1;
|
||||
for (i, n) in nodes.iter_mut().enumerate() {
|
||||
match n {
|
||||
NodeData::Element { .. } => {
|
||||
found_first_text_or_elem = true;
|
||||
index_of_last_nonempty_text_or_elem = i as isize;
|
||||
index_of_last_text_or_elem = i as isize;
|
||||
}
|
||||
NodeData::Text { value } => {
|
||||
if !found_first_text_or_elem {
|
||||
// This is the first element or text node, and it's a text node.
|
||||
found_first_text_or_elem = true;
|
||||
if trim {
|
||||
left_trim(value);
|
||||
};
|
||||
};
|
||||
// Our parser is guaranteed to output contiguous text as a single node,
|
||||
// so the adjacent nodes to a text node (not counting comments/bangs/etc.) should be elements.
|
||||
// TODO debug_assert this and add tests.
|
||||
if destroy_whole && is_all_whitespace(value) {
|
||||
value.clear();
|
||||
} else if collapse {
|
||||
collapse_whitespace(value);
|
||||
};
|
||||
// Set AFTER processing.
|
||||
index_of_last_text_or_elem = i as isize;
|
||||
if !value.is_empty() {
|
||||
index_of_last_nonempty_text_or_elem = i as isize;
|
||||
};
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
if trim && index_of_last_text_or_elem > -1 {
|
||||
match nodes.get_mut(index_of_last_text_or_elem as usize).unwrap() {
|
||||
NodeData::Text { value } => right_trim(value),
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
|
||||
let mut previous_sibling_element: &[u8] = EMPTY_TAG_NAME;
|
||||
for (i, c) in nodes.iter().enumerate() {
|
||||
let mut previous_sibling_element = Vec::<u8>::new();
|
||||
for (i, c) in nodes.into_iter().enumerate() {
|
||||
match c {
|
||||
NodeData::Bang { code, ended } => minify_bang(cfg, out, code, *ended),
|
||||
NodeData::Comment { code, ended } => minify_comment(cfg, out, code, *ended),
|
||||
NodeData::Bang { code, ended } => minify_bang(cfg, out, &code, ended),
|
||||
NodeData::Comment { code, ended } => minify_comment(cfg, out, &code, ended),
|
||||
NodeData::Element {
|
||||
attributes,
|
||||
children,
|
||||
closing_tag,
|
||||
name,
|
||||
namespace: child_ns,
|
||||
} => {
|
||||
minify_element(
|
||||
cfg,
|
||||
out,
|
||||
descendant_of_pre,
|
||||
child_ns,
|
||||
parent,
|
||||
previous_sibling_element,
|
||||
(i as isize) == index_of_last_text_or_elem_child,
|
||||
name,
|
||||
&previous_sibling_element,
|
||||
(i as isize) == index_of_last_nonempty_text_or_elem,
|
||||
&name,
|
||||
attributes,
|
||||
*closing_tag,
|
||||
closing_tag,
|
||||
children,
|
||||
);
|
||||
previous_sibling_element = name;
|
||||
}
|
||||
NodeData::Instruction { code, ended } => minify_instruction(cfg, out, code, *ended),
|
||||
NodeData::Instruction { code, ended } => minify_instruction(cfg, out, &code, ended),
|
||||
NodeData::ScriptOrStyleContent { code, lang } => match lang {
|
||||
ScriptOrStyleLang::CSS => minify_css(cfg, out, code),
|
||||
ScriptOrStyleLang::Data => out.extend_from_slice(code),
|
||||
ScriptOrStyleLang::JS => minify_js(cfg, out, code),
|
||||
ScriptOrStyleLang::CSS => minify_css(cfg, out, &code),
|
||||
ScriptOrStyleLang::Data => out.extend_from_slice(&code),
|
||||
ScriptOrStyleLang::JS => minify_js(cfg, out, &code),
|
||||
},
|
||||
NodeData::Text { value } => out
|
||||
.extend_from_slice(&CHEVRON_REPLACER.replace_all(&encode_ampersands(value, false))),
|
||||
NodeData::Text { value } => out.extend_from_slice(
|
||||
&CHEVRON_REPLACER.replace_all(&encode_ampersands(&value, false)),
|
||||
),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::collections::HashMap;
|
|||
use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang};
|
||||
use crate::cfg::Cfg;
|
||||
use crate::gen::codepoints::TAG_NAME_CHAR;
|
||||
use crate::minify::attr::minify_attr_val;
|
||||
use crate::minify::attr::{minify_attr_val, AttrType, AttrValMinified};
|
||||
use crate::minify::bang::minify_bang;
|
||||
use crate::minify::comment::minify_comment;
|
||||
use crate::minify::content::minify_content;
|
||||
|
@ -12,28 +12,25 @@ use crate::minify::instruction::minify_instruction;
|
|||
use crate::minify::js::minify_js;
|
||||
use crate::pattern::Replacer;
|
||||
use crate::spec::entity::encode::encode_ampersands;
|
||||
use crate::spec::tag::ns::Namespace;
|
||||
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
|
||||
use crate::spec::tag::EMPTY_TAG_NAME;
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum AttrType {
|
||||
None,
|
||||
Quoted,
|
||||
Unquoted,
|
||||
}
|
||||
|
||||
pub fn minify_element(
|
||||
cfg: &Cfg,
|
||||
out: &mut Vec<u8>,
|
||||
descendant_of_pre: bool,
|
||||
ns: Namespace,
|
||||
// Use an empty slice if none.
|
||||
parent: &[u8],
|
||||
// Use an empty slice if none.
|
||||
previous_sibling_element: &[u8],
|
||||
// If the last node of the parent is an element and it's this one.
|
||||
is_last_child_text_or_element_node: bool,
|
||||
tag_name: &[u8],
|
||||
attributes: &HashMap<Vec<u8>, Vec<u8>>,
|
||||
attributes: HashMap<Vec<u8>, Vec<u8>>,
|
||||
closing_tag: ElementClosingTag,
|
||||
children: &[NodeData],
|
||||
children: Vec<NodeData>,
|
||||
) -> () {
|
||||
let can_omit_closing_tag = cfg.omit_closing_tags
|
||||
&& (can_omit_as_before(previous_sibling_element, tag_name)
|
||||
|
@ -46,10 +43,12 @@ pub fn minify_element(
|
|||
if !cfg.remove_spaces_between_attributes || last_attr == AttrType::Unquoted {
|
||||
out.push(b' ');
|
||||
};
|
||||
out.extend_from_slice(name);
|
||||
out.extend_from_slice(&name);
|
||||
if !value.is_empty() {
|
||||
let min = minify_attr_val(&encode_ampersands(&value, true));
|
||||
out.push(b'=');
|
||||
out.extend_from_slice(&minify_attr_val(&encode_ampersands(value, true)));
|
||||
min.out(out);
|
||||
last_attr = min.typ();
|
||||
};
|
||||
}
|
||||
if closing_tag == ElementClosingTag::SelfClosing {
|
||||
|
@ -65,7 +64,13 @@ pub fn minify_element(
|
|||
return;
|
||||
};
|
||||
|
||||
minify_content(cfg, out, tag_name, children);
|
||||
minify_content(
|
||||
cfg,
|
||||
out,
|
||||
descendant_of_pre || (ns == Namespace::Html && tag_name == b"pre"),
|
||||
tag_name,
|
||||
children,
|
||||
);
|
||||
|
||||
if closing_tag != ElementClosingTag::Present || (cfg.omit_closing_tags && can_omit_closing_tag)
|
||||
{
|
||||
|
|
|
@ -104,6 +104,7 @@ pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -
|
|||
children: Vec::new(),
|
||||
closing_tag: ElementClosingTag::SelfClosing,
|
||||
name: elem_name,
|
||||
namespace: ns,
|
||||
};
|
||||
};
|
||||
if VOID_TAGS.contains(elem_name.as_slice()) {
|
||||
|
@ -112,9 +113,12 @@ pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -
|
|||
children: Vec::new(),
|
||||
closing_tag: ElementClosingTag::Void,
|
||||
name: elem_name,
|
||||
namespace: ns,
|
||||
};
|
||||
};
|
||||
|
||||
// TODO Is "svg" itself in the SVG namespace? Does it matter?
|
||||
// If it is and does, we need to update `namespace:` property of this function's return values.
|
||||
let child_ns = if elem_name == b"svg" {
|
||||
Namespace::Svg
|
||||
} else {
|
||||
|
@ -151,5 +155,6 @@ pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -
|
|||
ElementClosingTag::Present
|
||||
},
|
||||
name: elem_name,
|
||||
namespace: ns,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,6 @@ impl<V: 'static + Copy> TrieNode<V> {
|
|||
// - "&ax" will return node `a`.
|
||||
// - "+ax" will return itself.
|
||||
// - "" will return the itself.
|
||||
#[inline(always)]
|
||||
pub fn shortest_matching_prefix(&self, text: &[u8], from: usize) -> (&TrieNode<V>, usize) {
|
||||
let mut node: &TrieNode<V> = self;
|
||||
let mut pos = from;
|
||||
|
@ -50,7 +49,6 @@ impl<V: 'static + Copy> TrieNode<V> {
|
|||
(node, pos)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn longest_matching_prefix(&self, text: &[u8]) -> TrieNodeMatch<V> {
|
||||
let mut node: &TrieNode<V> = self;
|
||||
let mut value: Option<TrieNodeMatch<V>> = None;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use lazy_static::lazy_static;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
pub struct WhitespaceMinification {
|
||||
pub collapse: bool,
|
||||
pub destroy_whole: bool,
|
||||
|
@ -165,17 +166,18 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get_whitespace_minification_for_tag(
|
||||
tag_name: Option<&[u8]>,
|
||||
// Use empty slice if root.
|
||||
tag_name: &[u8],
|
||||
descendant_of_pre: bool,
|
||||
) -> &'static WhitespaceMinification {
|
||||
if descendant_of_pre {
|
||||
WHITESPACE_SENSITIVE
|
||||
} else if tag_name.is_empty() {
|
||||
ROOT
|
||||
} else {
|
||||
match tag_name {
|
||||
Some(n) => TAG_WHITESPACE_MINIFICATION.get(n).unwrap_or(&DEFAULT),
|
||||
None => ROOT,
|
||||
}
|
||||
TAG_WHITESPACE_MINIFICATION
|
||||
.get(tag_name)
|
||||
.unwrap_or(&DEFAULT)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue