2019-12-29 05:53:49 -05:00
use phf ::{ phf_set , Set } ;
2019-12-25 21:47:18 -05:00
use crate ::err ::{ ErrorType , ProcessingResult } ;
2020-01-25 07:05:07 -05:00
use crate ::proc ::checkpoint ::Checkpoint ;
2020-01-25 02:07:52 -05:00
use crate ::proc ::MatchAction ::* ;
use crate ::proc ::MatchMode ::* ;
2020-01-25 07:05:07 -05:00
use crate ::proc ::Processor ;
use crate ::proc ::range ::ProcessorRange ;
2019-12-23 06:48:41 -05:00
use crate ::spec ::codepoint ::{ is_alphanumeric , is_whitespace } ;
2020-01-07 04:56:37 -05:00
use crate ::spec ::tag ::omission ::CLOSING_TAG_OMISSION_RULES ;
2019-12-23 06:48:41 -05:00
use crate ::spec ::tag ::void ::VOID_TAGS ;
2020-01-17 19:42:01 -05:00
use crate ::unit ::attr ::{ AttributeMinification , ATTRS , AttrType , process_attr , ProcessedAttr } ;
2019-12-25 04:44:51 -05:00
use crate ::unit ::content ::process_content ;
2020-01-10 02:30:49 -05:00
use crate ::unit ::script ::process_script ;
2019-12-29 05:53:49 -05:00
use crate ::unit ::style ::process_style ;
2019-12-27 05:52:49 -05:00
2020-01-23 09:53:09 -05:00
#[ derive(Copy, Clone, PartialEq, Eq) ]
pub enum Namespace {
Html ,
Svg ,
}
2019-12-27 05:52:49 -05:00
pub static JAVASCRIPT_MIME_TYPES : Set < & 'static [ u8 ] > = phf_set! {
b " application/ecmascript " ,
b " application/javascript " ,
b " application/x-ecmascript " ,
b " application/x-javascript " ,
b " text/ecmascript " ,
b " text/javascript " ,
b " text/javascript1.0 " ,
b " text/javascript1.1 " ,
b " text/javascript1.2 " ,
b " text/javascript1.3 " ,
b " text/javascript1.4 " ,
b " text/javascript1.5 " ,
b " text/jscript " ,
b " text/livescript " ,
b " text/x-ecmascript " ,
b " text/x-javascript " ,
} ;
2019-12-23 06:48:41 -05:00
// Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`.
// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec.
fn is_valid_tag_name_char ( c : u8 ) -> bool {
is_alphanumeric ( c ) | | c = = b ':' | | c = = b '-'
}
2020-01-06 08:28:35 -05:00
#[ derive(Copy, Clone) ]
2019-12-26 01:16:13 -05:00
enum TagType {
Script ,
Style ,
Other ,
}
2020-01-18 06:19:06 -05:00
#[ derive(Copy, Clone) ]
pub struct MaybeClosingTag ( Option < ProcessorRange > ) ;
impl MaybeClosingTag {
pub fn none ( ) -> MaybeClosingTag {
MaybeClosingTag ( None )
}
2020-01-06 07:36:05 -05:00
2020-01-18 06:19:06 -05:00
pub fn write ( & mut self , proc : & mut Processor ) -> ( ) {
proc . write_slice ( b " </ " ) ;
2020-01-18 06:39:33 -05:00
proc . write_range ( self . 0. take ( ) . unwrap ( ) ) ;
2020-01-18 06:19:06 -05:00
proc . write ( b '>' ) ;
}
pub fn write_if_exists ( & mut self , proc : & mut Processor ) -> bool {
self . 0. take ( ) . filter ( | tag | {
2020-01-09 04:45:14 -05:00
proc . write_slice ( b " </ " ) ;
2020-01-18 06:19:06 -05:00
proc . write_range ( * tag ) ;
2020-01-09 04:45:14 -05:00
proc . write ( b '>' ) ;
2020-01-18 06:19:06 -05:00
true
} ) . is_some ( )
}
pub fn exists ( & self ) -> bool {
self . 0. is_some ( )
}
pub fn exists_and < F : FnOnce ( ProcessorRange ) -> bool > ( & self , pred : F ) -> bool {
match self . 0 {
Some ( range ) = > pred ( range ) ,
None = > false ,
}
}
pub fn replace ( & mut self , tag : MaybeClosingTag ) -> ( ) {
self . 0 = tag . 0 ;
2020-01-06 07:36:05 -05:00
}
}
// TODO Comment param `prev_sibling_closing_tag`.
2020-01-23 09:53:09 -05:00
pub fn process_tag ( proc : & mut Processor , ns : Namespace , mut prev_sibling_closing_tag : MaybeClosingTag ) -> ProcessingResult < MaybeClosingTag > {
2019-12-25 04:44:51 -05:00
// Expect to be currently at an opening tag.
2020-01-26 04:32:06 -05:00
proc . m ( IsChar ( b '<' ) , Discard ) . expect ( ) ;
2019-12-25 04:44:51 -05:00
// May not be valid tag name at current position, so require instead of expect.
2020-01-26 04:32:06 -05:00
let source_tag_name = proc . m ( WhilePred ( is_valid_tag_name_char ) , Discard ) . require ( " tag name " ) ? ;
2020-01-18 06:19:06 -05:00
if prev_sibling_closing_tag . exists_and ( | prev_tag |
CLOSING_TAG_OMISSION_RULES
. get ( & proc [ prev_tag ] )
. filter ( | rule | rule . can_omit_as_before ( & proc [ source_tag_name ] ) )
. is_none ( )
) {
prev_sibling_closing_tag . write ( proc ) ;
2020-01-06 07:36:05 -05:00
} ;
// Write initially skipped left chevron.
proc . write ( b '<' ) ;
// Write previously skipped name and use written code as range (otherwise source code will eventually be overwritten).
2020-01-07 04:56:37 -05:00
let tag_name = proc . write_range ( source_tag_name ) ;
2019-12-26 01:16:13 -05:00
2020-01-07 04:56:37 -05:00
let tag_type = match & proc [ tag_name ] {
2019-12-26 01:16:13 -05:00
b " script " = > TagType ::Script ,
b " style " = > TagType ::Style ,
_ = > TagType ::Other ,
} ;
2019-12-23 06:48:41 -05:00
2019-12-26 00:17:57 -05:00
let mut last_attr_type : Option < AttrType > = None ;
2019-12-23 06:48:41 -05:00
let mut self_closing = false ;
2020-01-07 04:56:37 -05:00
let is_void_tag = VOID_TAGS . contains ( & proc [ tag_name ] ) ;
2019-12-23 06:48:41 -05:00
loop {
2019-12-26 08:23:33 -05:00
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).
2020-01-26 04:32:06 -05:00
proc . m ( WhilePred ( is_whitespace ) , Discard ) ;
2019-12-23 06:48:41 -05:00
2020-01-26 04:32:06 -05:00
if proc . m ( IsChar ( b '>' ) , Keep ) . nonempty ( ) {
2019-12-23 06:48:41 -05:00
// End of tag.
break ;
}
2020-01-07 04:56:37 -05:00
// Don't write self closing "/>" as it could be shortened to ">" if void tag.
2020-01-26 04:32:06 -05:00
self_closing = proc . m ( IsSeq ( b " /> " ) , Discard ) . nonempty ( ) ;
2019-12-25 04:44:51 -05:00
if self_closing {
2019-12-23 06:48:41 -05:00
break ;
}
2020-01-06 08:28:35 -05:00
// Mark attribute start in case we want to erase it completely.
2020-01-25 07:05:07 -05:00
let attr_checkpoint = Checkpoint ::new ( proc ) ;
2020-01-06 08:28:35 -05:00
let mut erase_attr = false ;
2019-12-26 00:17:57 -05:00
// Write space after tag name or unquoted/valueless attribute.
2020-01-14 05:10:49 -05:00
// Don't write after quoted.
2020-01-15 06:09:16 -05:00
// Handle rare case where file ends in opening tag before an attribute and no minification has been done yet,
// e.g. `<-` (yes, that's the entire file).
if proc . at_end ( ) {
return Err ( ErrorType ::UnexpectedEnd ) ;
} ;
2019-12-26 00:17:57 -05:00
match last_attr_type {
2019-12-26 08:23:33 -05:00
Some ( AttrType ::Unquoted ) | Some ( AttrType ::NoValue ) | None = > proc . write ( b ' ' ) ,
_ = > { }
2019-12-26 00:17:57 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-01-23 09:53:09 -05:00
let ProcessedAttr { name , typ , value } = process_attr ( proc , ns , tag_name ) ? ;
2020-01-06 08:28:35 -05:00
match ( tag_type , & proc [ name ] ) {
( TagType ::Script , b " type " ) = > {
// It's JS if the value is empty or one of `JAVASCRIPT_MIME_TYPES`.
2020-01-10 02:45:06 -05:00
let script_tag_type_is_js = value
2020-01-07 04:56:37 -05:00
. filter ( | v | ! JAVASCRIPT_MIME_TYPES . contains ( & proc [ * v ] ) )
. is_none ( ) ;
2020-01-06 08:28:35 -05:00
if script_tag_type_is_js {
erase_attr = true ;
} ;
2020-01-07 04:56:37 -05:00
}
2020-01-14 08:58:33 -05:00
( _ , name ) = > {
// TODO Check if HTML tag before checking if attribute removal applies to all elements.
2020-01-23 09:53:09 -05:00
erase_attr = match ( value , ATTRS . get ( ns , & proc [ tag_name ] , name ) ) {
2020-01-17 19:42:01 -05:00
( None , Some ( AttributeMinification { redundant_if_empty : true , .. } ) ) = > true ,
( Some ( val ) , Some ( AttributeMinification { default_value : Some ( defval ) , .. } ) ) = > proc [ val ] . eq ( * defval ) ,
_ = > false ,
} ;
2020-01-14 08:58:33 -05:00
}
2019-12-27 05:52:49 -05:00
} ;
2020-01-06 08:28:35 -05:00
if erase_attr {
2020-01-25 07:05:07 -05:00
attr_checkpoint . erase_written ( proc ) ;
2020-01-06 08:28:35 -05:00
} else {
last_attr_type = Some ( typ ) ;
} ;
2019-12-25 04:44:51 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-01-14 08:58:33 -05:00
// TODO Self closing does not actually close for HTML elements, but might close for foreign elements.
// See spec for more details.
2020-01-07 04:56:37 -05:00
if self_closing | | is_void_tag {
if self_closing {
// Write discarded tag closing characters.
2020-01-17 19:42:01 -05:00
if is_void_tag {
proc . write_slice ( b " > " ) ;
} else {
proc . write_slice ( b " /> " ) ;
} ;
2020-01-07 04:56:37 -05:00
} ;
2020-01-18 06:19:06 -05:00
return Ok ( MaybeClosingTag ( None ) ) ;
2019-12-25 04:44:51 -05:00
} ;
2019-12-23 06:48:41 -05:00
2020-01-23 21:17:46 -05:00
let child_ns = if proc [ tag_name ] . eq ( b " svg " ) {
2020-01-23 09:53:09 -05:00
Namespace ::Svg
} else {
ns
} ;
2019-12-26 01:16:13 -05:00
match tag_type {
2020-01-10 02:30:49 -05:00
TagType ::Script = > process_script ( proc ) ? ,
2019-12-26 01:16:13 -05:00
TagType ::Style = > process_style ( proc ) ? ,
2020-01-23 09:53:09 -05:00
_ = > process_content ( proc , child_ns , Some ( tag_name ) ) ? ,
2019-12-25 04:44:51 -05:00
} ;
2019-12-23 06:48:41 -05:00
// Require closing tag for non-void.
2020-01-26 04:32:06 -05:00
proc . m ( IsSeq ( b " </ " ) , Discard ) . require ( " closing tag " ) ? ;
let closing_tag = proc . m ( WhilePred ( is_valid_tag_name_char ) , Discard ) . require ( " closing tag name " ) ? ;
2020-01-17 19:42:01 -05:00
// We need to check closing tag matches as otherwise when we later write closing tag, it might be longer than source closing tag and cause source to be overwritten.
2020-01-10 02:45:06 -05:00
if ! proc [ closing_tag ] . eq ( & proc [ tag_name ] ) {
2020-01-10 02:30:49 -05:00
return Err ( ErrorType ::ClosingTagMismatch ) ;
} ;
2020-01-26 04:32:06 -05:00
proc . m ( WhilePred ( is_whitespace ) , Discard ) ;
proc . m ( IsChar ( b '>' ) , Discard ) . require ( " closing tag end " ) ? ;
2020-01-18 06:19:06 -05:00
Ok ( MaybeClosingTag ( Some ( tag_name ) ) )
2019-12-23 06:48:41 -05:00
}