2021-08-06 02:17:45 -04:00
use aho_corasick ::{ AhoCorasickBuilder , MatchKind } ;
use lazy_static ::lazy_static ;
use crate ::ast ::{ NodeData , ScriptOrStyleLang } ;
use crate ::cfg ::Cfg ;
2021-08-09 05:56:37 -04:00
use crate ::common ::gen ::codepoints ::TAG_NAME_CHAR ;
use crate ::common ::pattern ::Replacer ;
use crate ::common ::spec ::tag ::whitespace ::{
get_whitespace_minification_for_tag , WhitespaceMinification ,
} ;
use crate ::common ::whitespace ::{ collapse_whitespace , is_all_whitespace , left_trim , right_trim } ;
2021-08-08 04:46:51 -04:00
use crate ::entity ::encode ::encode_entities ;
2021-08-06 02:17:45 -04:00
use crate ::minify ::bang ::minify_bang ;
use crate ::minify ::comment ::minify_comment ;
use crate ::minify ::css ::minify_css ;
2021-08-09 12:56:48 -04:00
use crate ::minify ::doctype ::minify_doctype ;
2021-08-06 02:17:45 -04:00
use crate ::minify ::element ::minify_element ;
use crate ::minify ::instruction ::minify_instruction ;
use crate ::minify ::js ::minify_js ;
fn build_chevron_replacer ( ) -> Replacer {
let mut patterns = Vec ::< Vec < u8 > > ::new ( ) ;
let mut replacements = Vec ::< Vec < u8 > > ::new ( ) ;
2021-08-06 07:56:54 -04:00
// Replace all `<` with a `<` if it's followed by a TAG_NAME_CHAR, `/`, `!`, or `?`.
2021-08-06 02:17:45 -04:00
for c in 0 u8 .. 128 u8 {
2021-08-06 07:56:54 -04:00
// TODO Create single lookup.
if TAG_NAME_CHAR [ c ] | | c = = b '/' | | c = = b '!' | | c = = b '?' {
2021-08-06 02:17:45 -04:00
patterns . push ( vec! [ b '<' , c ] ) ;
replacements . push ( vec! [ b '&' , b 'L' , b 'T' , c ] ) ;
} ;
2021-08-06 02:19:36 -04:00
}
2021-08-06 02:17:45 -04:00
Replacer ::new (
AhoCorasickBuilder ::new ( )
. dfa ( true )
. match_kind ( MatchKind ::LeftmostLongest )
. build ( patterns ) ,
replacements ,
)
}
lazy_static! {
static ref CHEVRON_REPLACER : Replacer = build_chevron_replacer ( ) ;
}
pub fn minify_content (
cfg : & Cfg ,
out : & mut Vec < u8 > ,
2021-08-06 03:33:56 -04:00
descendant_of_pre : bool ,
2021-08-06 02:17:45 -04:00
// Use empty slice if none.
parent : & [ u8 ] ,
2021-08-06 03:33:56 -04:00
mut nodes : Vec < NodeData > ,
2021-08-06 09:18:45 -04:00
) {
2021-08-06 03:33:56 -04:00
let & WhitespaceMinification {
collapse ,
destroy_whole ,
trim ,
} = get_whitespace_minification_for_tag ( parent , descendant_of_pre ) ;
// TODO Document or fix: even though bangs/comments/etc. don't affect layout, we don't collapse/destroy-whole/trim combined text nodes across bangs/comments/etc., as that's too complex and is ambiguous about which nodes should whitespace be deleted from.
let mut found_first_text_or_elem = false ;
let mut index_of_last_nonempty_text_or_elem : isize = - 1 ;
let mut index_of_last_text_or_elem : isize = - 1 ;
2021-08-06 09:07:55 -04:00
for i in 0 .. nodes . len ( ) {
let ( previous_nodes , next_nodes ) = nodes . split_at_mut ( i ) ;
let n = & mut next_nodes [ 0 ] ;
2021-08-06 03:33:56 -04:00
match n {
2021-08-06 09:07:55 -04:00
NodeData ::Element { name , .. } = > {
if index_of_last_nonempty_text_or_elem > - 1 {
2021-08-06 09:23:05 -04:00
if let NodeData ::Element {
next_sibling_element_name ,
..
} = & mut previous_nodes [ index_of_last_nonempty_text_or_elem as usize ]
{
debug_assert! ( next_sibling_element_name . is_empty ( ) ) ;
next_sibling_element_name . extend_from_slice ( name ) ;
2021-08-06 09:07:55 -04:00
} ;
} ;
2021-08-06 03:33:56 -04:00
found_first_text_or_elem = true ;
index_of_last_nonempty_text_or_elem = i as isize ;
index_of_last_text_or_elem = i as isize ;
}
NodeData ::Text { value } = > {
if ! found_first_text_or_elem {
// This is the first element or text node, and it's a text node.
found_first_text_or_elem = true ;
if trim {
left_trim ( value ) ;
} ;
} ;
// Our parser is guaranteed to output contiguous text as a single node,
// so the adjacent nodes to a text node (not counting comments/bangs/etc.) should be elements.
// TODO debug_assert this and add tests.
if destroy_whole & & is_all_whitespace ( value ) {
value . clear ( ) ;
} else if collapse {
collapse_whitespace ( value ) ;
} ;
// Set AFTER processing.
index_of_last_text_or_elem = i as isize ;
if ! value . is_empty ( ) {
index_of_last_nonempty_text_or_elem = i as isize ;
} ;
}
_ = > { }
} ;
}
if trim & & index_of_last_text_or_elem > - 1 {
2021-08-06 09:23:05 -04:00
if let NodeData ::Text { value } =
nodes . get_mut ( index_of_last_text_or_elem as usize ) . unwrap ( )
{
right_trim ( value ) ;
2021-08-06 02:17:45 -04:00
} ;
2021-08-06 02:19:36 -04:00
}
2021-08-06 02:17:45 -04:00
2021-08-06 03:33:56 -04:00
for ( i , c ) in nodes . into_iter ( ) . enumerate ( ) {
2021-08-06 02:17:45 -04:00
match c {
2021-08-06 03:33:56 -04:00
NodeData ::Bang { code , ended } = > minify_bang ( cfg , out , & code , ended ) ,
NodeData ::Comment { code , ended } = > minify_comment ( cfg , out , & code , ended ) ,
2021-08-09 12:56:48 -04:00
NodeData ::Doctype { legacy , ended } = > minify_doctype ( cfg , out , & legacy , ended ) ,
2021-08-06 02:17:45 -04:00
NodeData ::Element {
attributes ,
children ,
closing_tag ,
name ,
2021-08-06 03:33:56 -04:00
namespace : child_ns ,
2021-08-06 08:53:33 -04:00
next_sibling_element_name ,
} = > minify_element (
cfg ,
out ,
descendant_of_pre ,
child_ns ,
parent ,
& next_sibling_element_name ,
( i as isize ) = = index_of_last_nonempty_text_or_elem ,
& name ,
attributes ,
closing_tag ,
children ,
) ,
2021-08-06 03:33:56 -04:00
NodeData ::Instruction { code , ended } = > minify_instruction ( cfg , out , & code , ended ) ,
2021-08-06 02:17:45 -04:00
NodeData ::ScriptOrStyleContent { code , lang } = > match lang {
2021-08-06 03:33:56 -04:00
ScriptOrStyleLang ::CSS = > minify_css ( cfg , out , & code ) ,
ScriptOrStyleLang ::Data = > out . extend_from_slice ( & code ) ,
ScriptOrStyleLang ::JS = > minify_js ( cfg , out , & code ) ,
2021-08-06 02:17:45 -04:00
} ,
2021-08-06 08:53:33 -04:00
NodeData ::Text { value } = > out
. extend_from_slice ( & CHEVRON_REPLACER . replace_all ( & encode_entities ( & value , false ) ) ) ,
2021-08-06 02:17:45 -04:00
} ;
2021-08-06 02:19:36 -04:00
}
2021-08-06 02:17:45 -04:00
}