2020-07-03 03:32:09 -04:00
use lazy_static ::lazy_static ;
use std ::collections ::HashSet ;
2019-12-25 21:47:18 -05:00
use crate ::err ::{ ErrorType , ProcessingResult } ;
2020-07-30 05:51:43 -04:00
use crate ::proc ::checkpoint ::{ WriteCheckpoint , ReadCheckpoint } ;
2020-01-25 02:07:52 -05:00
use crate ::proc ::MatchAction ::* ;
use crate ::proc ::MatchMode ::* ;
2020-01-25 07:05:07 -05:00
use crate ::proc ::Processor ;
use crate ::proc ::range ::ProcessorRange ;
2019-12-23 06:48:41 -05:00
use crate ::spec ::tag ::void ::VOID_TAGS ;
2020-06-19 03:58:16 -04:00
use crate ::unit ::attr ::{ AttrType , process_attr , ProcessedAttr } ;
2019-12-25 04:44:51 -05:00
use crate ::unit ::content ::process_content ;
2020-01-10 02:30:49 -05:00
use crate ::unit ::script ::process_script ;
2019-12-29 05:53:49 -05:00
use crate ::unit ::style ::process_style ;
2020-06-19 03:58:16 -04:00
use crate ::gen ::attrs ::{ ATTRS , AttributeMinification } ;
use crate ::spec ::tag ::ns ::Namespace ;
2020-07-09 03:06:08 -04:00
use crate ::gen ::codepoints ::{ TAG_NAME_CHAR , WHITESPACE } ;
2020-07-10 11:15:56 -04:00
use crate ::cfg ::Cfg ;
2020-07-30 00:38:40 -04:00
use crate ::spec ::tag ::omission ::{ can_omit_as_last_node , can_omit_as_before } ;
2020-01-23 09:53:09 -05:00
2020-07-03 03:32:09 -04:00
lazy_static! {
pub static ref JAVASCRIPT_MIME_TYPES : HashSet < & 'static [ u8 ] > = {
let mut s = HashSet ::< & 'static [ u8 ] > ::new ( ) ;
s . insert ( b " application/ecmascript " ) ;
s . insert ( b " application/javascript " ) ;
s . insert ( b " application/x-ecmascript " ) ;
s . insert ( b " application/x-javascript " ) ;
s . insert ( b " text/ecmascript " ) ;
s . insert ( b " text/javascript " ) ;
s . insert ( b " text/javascript1.0 " ) ;
s . insert ( b " text/javascript1.1 " ) ;
s . insert ( b " text/javascript1.2 " ) ;
s . insert ( b " text/javascript1.3 " ) ;
s . insert ( b " text/javascript1.4 " ) ;
s . insert ( b " text/javascript1.5 " ) ;
s . insert ( b " text/jscript " ) ;
s . insert ( b " text/livescript " ) ;
s . insert ( b " text/x-ecmascript " ) ;
s . insert ( b " text/x-javascript " ) ;
s
} ;
}
2019-12-23 06:48:41 -05:00
2020-01-06 08:28:35 -05:00
#[ derive(Copy, Clone) ]
2019-12-26 01:16:13 -05:00
enum TagType {
2020-07-24 23:22:25 -04:00
ScriptJs ,
ScriptData ,
2019-12-26 01:16:13 -05:00
Style ,
Other ,
}
2020-01-18 06:19:06 -05:00
#[ derive(Copy, Clone) ]
pub struct MaybeClosingTag ( Option < ProcessorRange > ) ;
impl MaybeClosingTag {
2020-07-27 04:08:53 -04:00
#[ inline(always) ]
2020-01-18 06:19:06 -05:00
pub fn none ( ) -> MaybeClosingTag {
MaybeClosingTag ( None )
}
2020-01-06 07:36:05 -05:00
2020-01-31 07:15:35 -05:00
#[ inline(always) ]
2020-01-18 06:19:06 -05:00
pub fn write ( & mut self , proc : & mut Processor ) -> ( ) {
proc . write_slice ( b " </ " ) ;
2020-01-18 06:39:33 -05:00
proc . write_range ( self . 0. take ( ) . unwrap ( ) ) ;
2020-01-18 06:19:06 -05:00
proc . write ( b '>' ) ;
}
2020-01-31 07:15:35 -05:00
#[ inline(always) ]
2020-01-18 06:19:06 -05:00
pub fn write_if_exists ( & mut self , proc : & mut Processor ) -> bool {
self . 0. take ( ) . filter ( | tag | {
2020-01-09 04:45:14 -05:00
proc . write_slice ( b " </ " ) ;
2020-01-18 06:19:06 -05:00
proc . write_range ( * tag ) ;
2020-01-09 04:45:14 -05:00
proc . write ( b '>' ) ;
2020-01-18 06:19:06 -05:00
true
} ) . is_some ( )
}
2020-01-31 07:15:35 -05:00
#[ inline(always) ]
2020-01-18 06:19:06 -05:00
pub fn exists ( & self ) -> bool {
self . 0. is_some ( )
}
2020-01-31 07:15:35 -05:00
#[ inline(always) ]
2020-01-18 06:19:06 -05:00
pub fn exists_and < F : FnOnce ( ProcessorRange ) -> bool > ( & self , pred : F ) -> bool {
match self . 0 {
Some ( range ) = > pred ( range ) ,
None = > false ,
}
}
2020-01-31 07:15:35 -05:00
#[ inline(always) ]
2020-01-18 06:19:06 -05:00
pub fn replace ( & mut self , tag : MaybeClosingTag ) -> ( ) {
self . 0 = tag . 0 ;
2020-01-06 07:36:05 -05:00
}
}
// TODO Comment param `prev_sibling_closing_tag`.
2020-09-20 06:50:22 -04:00
pub fn process_tag (
proc : & mut Processor ,
cfg : & Cfg ,
ns : Namespace ,
parent : Option < ProcessorRange > ,
mut prev_sibling_closing_tag : MaybeClosingTag ,
source_tag_name : ProcessorRange ,
) -> ProcessingResult < MaybeClosingTag > {
2020-07-30 00:38:40 -04:00
if prev_sibling_closing_tag . exists_and ( | prev_tag | ! can_omit_as_before ( proc , Some ( prev_tag ) , source_tag_name ) ) {
2020-01-18 06:19:06 -05:00
prev_sibling_closing_tag . write ( proc ) ;
2020-01-06 07:36:05 -05:00
} ;
// Write initially skipped left chevron.
proc . write ( b '<' ) ;
// Write previously skipped name and use written code as range (otherwise source code will eventually be overwritten).
2020-01-07 04:56:37 -05:00
let tag_name = proc . write_range ( source_tag_name ) ;
2019-12-26 01:16:13 -05:00
2020-07-24 23:22:25 -04:00
let mut tag_type = match & proc [ tag_name ] {
// Unless non-JS MIME `type` is provided, `script` tags contain JS.
b " script " = > TagType ::ScriptJs ,
2019-12-26 01:16:13 -05:00
b " style " = > TagType ::Style ,
_ = > TagType ::Other ,
} ;
2019-12-23 06:48:41 -05:00
2019-12-26 00:17:57 -05:00
let mut last_attr_type : Option < AttrType > = None ;
2019-12-23 06:48:41 -05:00
let mut self_closing = false ;
2020-01-07 04:56:37 -05:00
let is_void_tag = VOID_TAGS . contains ( & proc [ tag_name ] ) ;
2019-12-23 06:48:41 -05:00
loop {
2019-12-26 08:23:33 -05:00
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).
2020-07-09 03:06:08 -04:00
proc . m ( WhileInLookup ( WHITESPACE ) , Discard ) ;
2019-12-23 06:48:41 -05:00
2020-01-26 04:32:06 -05:00
if proc . m ( IsChar ( b '>' ) , Keep ) . nonempty ( ) {
2019-12-23 06:48:41 -05:00
// End of tag.
break ;
}
2020-01-07 04:56:37 -05:00
// Don't write self closing "/>" as it could be shortened to ">" if void tag.
2020-01-26 04:32:06 -05:00
self_closing = proc . m ( IsSeq ( b " /> " ) , Discard ) . nonempty ( ) ;
2019-12-25 04:44:51 -05:00
if self_closing {
2019-12-23 06:48:41 -05:00
break ;
}
2020-01-06 08:28:35 -05:00
// Mark attribute start in case we want to erase it completely.
2020-07-30 05:51:43 -04:00
let attr_checkpoint = WriteCheckpoint ::new ( proc ) ;
2020-01-06 08:28:35 -05:00
let mut erase_attr = false ;
2019-12-26 00:17:57 -05:00
// Write space after tag name or unquoted/valueless attribute.
2020-01-14 05:10:49 -05:00
// Don't write after quoted.
2020-01-15 06:09:16 -05:00
// Handle rare case where file ends in opening tag before an attribute and no minification has been done yet,
// e.g. `<-` (yes, that's the entire file).
if proc . at_end ( ) {
return Err ( ErrorType ::UnexpectedEnd ) ;
} ;
2019-12-26 00:17:57 -05:00
match last_attr_type {
2019-12-26 08:23:33 -05:00
Some ( AttrType ::Unquoted ) | Some ( AttrType ::NoValue ) | None = > proc . write ( b ' ' ) ,
_ = > { }
2019-12-26 00:17:57 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-01-23 09:53:09 -05:00
let ProcessedAttr { name , typ , value } = process_attr ( proc , ns , tag_name ) ? ;
2020-01-06 08:28:35 -05:00
match ( tag_type , & proc [ name ] ) {
2020-07-24 23:22:25 -04:00
// NOTE: We don't support multiple `type` attributes, so can't go from ScriptData => ScriptJs.
( TagType ::ScriptJs , b " type " ) = > {
2020-01-06 08:28:35 -05:00
// It's JS if the value is empty or one of `JAVASCRIPT_MIME_TYPES`.
2020-01-10 02:45:06 -05:00
let script_tag_type_is_js = value
2020-01-07 04:56:37 -05:00
. filter ( | v | ! JAVASCRIPT_MIME_TYPES . contains ( & proc [ * v ] ) )
. is_none ( ) ;
2020-01-06 08:28:35 -05:00
if script_tag_type_is_js {
erase_attr = true ;
2020-07-24 23:22:25 -04:00
} else {
// Tag does not contain JS, don't minify JS.
tag_type = TagType ::ScriptData ;
2020-01-06 08:28:35 -05:00
} ;
2020-01-07 04:56:37 -05:00
}
2020-01-14 08:58:33 -05:00
( _ , name ) = > {
// TODO Check if HTML tag before checking if attribute removal applies to all elements.
2020-01-23 09:53:09 -05:00
erase_attr = match ( value , ATTRS . get ( ns , & proc [ tag_name ] , name ) ) {
2020-01-17 19:42:01 -05:00
( None , Some ( AttributeMinification { redundant_if_empty : true , .. } ) ) = > true ,
( Some ( val ) , Some ( AttributeMinification { default_value : Some ( defval ) , .. } ) ) = > proc [ val ] . eq ( * defval ) ,
_ = > false ,
} ;
2020-01-14 08:58:33 -05:00
}
2019-12-27 05:52:49 -05:00
} ;
2020-01-06 08:28:35 -05:00
if erase_attr {
2020-01-25 07:05:07 -05:00
attr_checkpoint . erase_written ( proc ) ;
2020-01-06 08:28:35 -05:00
} else {
last_attr_type = Some ( typ ) ;
} ;
2019-12-25 04:44:51 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-01-14 08:58:33 -05:00
// TODO Self closing does not actually close for HTML elements, but might close for foreign elements.
// See spec for more details.
2020-01-07 04:56:37 -05:00
if self_closing | | is_void_tag {
if self_closing {
// Write discarded tag closing characters.
2020-01-17 19:42:01 -05:00
if is_void_tag {
proc . write_slice ( b " > " ) ;
} else {
2020-07-04 01:58:35 -04:00
if let Some ( AttrType ::Unquoted ) = last_attr_type {
// Prevent `/` from being part of the value.
proc . write ( b ' ' ) ;
} ;
2020-01-17 19:42:01 -05:00
proc . write_slice ( b " /> " ) ;
} ;
2020-01-07 04:56:37 -05:00
} ;
2020-01-18 06:19:06 -05:00
return Ok ( MaybeClosingTag ( None ) ) ;
2019-12-25 04:44:51 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-01-23 21:17:46 -05:00
let child_ns = if proc [ tag_name ] . eq ( b " svg " ) {
2020-01-23 09:53:09 -05:00
Namespace ::Svg
} else {
ns
} ;
2020-07-30 00:38:40 -04:00
let mut closing_tag_omitted = false ;
2019-12-26 01:16:13 -05:00
match tag_type {
2020-07-24 23:22:25 -04:00
TagType ::ScriptData = > process_script ( proc , cfg , false ) ? ,
TagType ::ScriptJs = > process_script ( proc , cfg , true ) ? ,
2021-01-07 08:26:02 -05:00
TagType ::Style = > process_style ( proc , cfg ) ? ,
2020-07-30 00:38:40 -04:00
_ = > closing_tag_omitted = process_content ( proc , cfg , child_ns , Some ( tag_name ) ) ? . closing_tag_omitted ,
} ;
let can_omit_closing_tag = can_omit_as_last_node ( proc , parent , source_tag_name ) ;
if closing_tag_omitted | | proc . at_end ( ) & & can_omit_closing_tag {
return Ok ( MaybeClosingTag ( None ) ) ;
2019-12-25 04:44:51 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-07-30 00:38:40 -04:00
let closing_tag_checkpoint = ReadCheckpoint ::new ( proc ) ;
2020-01-26 04:32:06 -05:00
proc . m ( IsSeq ( b " </ " ) , Discard ) . require ( " closing tag " ) ? ;
2020-07-09 03:06:08 -04:00
let closing_tag = proc . m ( WhileInLookup ( TAG_NAME_CHAR ) , Discard ) . require ( " closing tag name " ) ? ;
2020-07-29 22:32:53 -04:00
proc . make_lowercase ( closing_tag ) ;
2020-07-30 00:38:40 -04:00
2020-01-17 19:42:01 -05:00
// We need to check closing tag matches as otherwise when we later write closing tag, it might be longer than source closing tag and cause source to be overwritten.
2020-07-30 05:51:43 -04:00
if proc [ closing_tag ] ! = proc [ tag_name ] {
if can_omit_closing_tag {
closing_tag_checkpoint . restore ( proc ) ;
Ok ( MaybeClosingTag ( None ) )
} else {
Err ( ErrorType ::ClosingTagMismatch {
expected : unsafe { String ::from_utf8_unchecked ( proc [ tag_name ] . to_vec ( ) ) } ,
got : unsafe { String ::from_utf8_unchecked ( proc [ closing_tag ] . to_vec ( ) ) } ,
} )
}
} else {
proc . m ( WhileInLookup ( WHITESPACE ) , Discard ) ;
proc . m ( IsChar ( b '>' ) , Discard ) . require ( " closing tag end " ) ? ;
Ok ( MaybeClosingTag ( Some ( tag_name ) ) )
}
2019-12-23 06:48:41 -05:00
}