2019-12-25 21:47:18 -05:00
use crate ::err ::ProcessingResult ;
2020-01-14 01:55:27 -05:00
use crate ::proc ::{ Processor , ProcessorRange , UnintentionalEntityPrevention } ;
2019-12-25 04:44:51 -05:00
use crate ::spec ::codepoint ::is_whitespace ;
2020-01-06 07:36:05 -05:00
use crate ::spec ::tag ::omission ::CLOSING_TAG_OMISSION_RULES ;
2019-12-25 04:44:51 -05:00
use crate ::unit ::bang ::process_bang ;
use crate ::unit ::comment ::process_comment ;
2019-12-28 07:06:04 -05:00
use crate ::unit ::entity ::{ EntityType , parse_entity } ;
2020-01-14 01:55:27 -05:00
use crate ::unit ::instruction ::process_instruction ;
2020-01-06 07:36:05 -05:00
use crate ::unit ::tag ::{ process_tag , ProcessedTag } ;
2020-01-17 03:27:34 -05:00
use crate ::spec ::tag ::whitespace ::{ get_whitespace_minification_for_tag , WhitespaceMinification } ;
2019-12-25 04:44:51 -05:00
2020-01-03 00:57:32 -05:00
#[ derive(Copy, Clone, PartialEq, Eq) ]
2019-12-25 04:44:51 -05:00
enum ContentType {
Comment ,
Bang ,
2020-01-08 07:00:23 -05:00
Instruction ,
2019-12-25 04:44:51 -05:00
OpeningTag ,
Start ,
End ,
Entity ,
Whitespace ,
Text ,
}
impl ContentType {
2020-01-08 07:00:23 -05:00
fn is_comment_bang_instruction_opening_tag ( & self ) -> bool {
2019-12-25 04:44:51 -05:00
match self {
2020-01-08 07:00:23 -05:00
ContentType ::Comment | ContentType ::Bang | ContentType ::Instruction | ContentType ::OpeningTag = > true ,
2019-12-25 04:44:51 -05:00
_ = > false ,
}
}
2019-12-25 07:29:18 -05:00
fn peek ( proc : & mut Processor ) -> ContentType {
2020-01-04 21:55:20 -05:00
// Manually write out matching for fast performance as this is hot spot; don't use generated trie.
2020-01-06 02:13:24 -05:00
match proc . peek_offset_eof ( 0 ) {
2020-01-04 21:28:34 -05:00
None = > ContentType ::End ,
Some ( b '<' ) = > match proc . peek_offset_eof ( 1 ) {
Some ( b '/' ) = > ContentType ::End ,
2020-01-08 07:00:23 -05:00
Some ( b '?' ) = > ContentType ::Instruction ,
2020-01-04 21:28:34 -05:00
Some ( b '!' ) = > match proc . peek_slice_offset_eof ( 2 , 2 ) {
Some ( b " -- " ) = > ContentType ::Comment ,
_ = > ContentType ::Bang ,
} ,
_ = > ContentType ::OpeningTag
} ,
Some ( b '&' ) = > ContentType ::Entity ,
Some ( c ) = > if is_whitespace ( c ) { ContentType ::Whitespace } else { ContentType ::Text } ,
}
2019-12-25 04:44:51 -05:00
}
}
2020-01-04 01:39:37 -05:00
macro_rules ! handle_content_type {
2020-01-14 01:55:27 -05:00
( $proc :ident , $parent :ident , $next_content_type :expr , $uep :ident , $prev_sibling_closing_tag :ident , $get_entity :expr , $on_whitespace :block ) = > {
2020-01-04 01:39:37 -05:00
// Process and consume next character(s).
2020-01-15 06:09:16 -05:00
let next_content_type = $next_content_type ;
match next_content_type {
ContentType ::OpeningTag | ContentType ::End | ContentType ::Comment | ContentType ::Bang | ContentType ::Instruction = > {
// TODO Comment: Do not always initialise `uep` as `prev_sibling_closing_tag` might get written.
2020-01-14 01:55:27 -05:00
$uep . take ( ) . map ( | mut uep | $proc . after_write ( & mut uep , true ) ) ;
2020-01-15 06:09:16 -05:00
}
_ = > { }
} ;
match next_content_type {
ContentType ::OpeningTag = > {
2020-01-06 07:36:05 -05:00
$prev_sibling_closing_tag = Some ( process_tag ( $proc , $prev_sibling_closing_tag ) ? ) ;
}
ContentType ::End = > {
if let Some ( prev_tag ) = $prev_sibling_closing_tag {
let can_omit = match ( $parent , CLOSING_TAG_OMISSION_RULES . get ( & $proc [ prev_tag . name ] ) ) {
( Some ( parent_range ) , Some ( rule ) ) = > rule . can_omit_as_last_node ( & $proc [ parent_range ] ) ,
_ = > false ,
} ;
if ! can_omit {
prev_tag . write_closing_tag ( $proc ) ;
} ;
} ;
break ;
}
content_type = > {
// Immediate next sibling node is not an element, so write any immediate previous sibling element's closing tag.
$prev_sibling_closing_tag . take ( ) . map ( | tag | tag . write_closing_tag ( $proc ) ) ;
match content_type {
2020-01-14 01:55:27 -05:00
ContentType ::Comment | ContentType ::Bang | ContentType ::Instruction = > {
match content_type {
ContentType ::Comment = > { process_comment ( $proc ) ? ; }
ContentType ::Bang = > { process_bang ( $proc ) ? ; }
ContentType ::Instruction = > { process_instruction ( $proc ) ? ; }
_ = > unreachable! ( ) ,
} ;
}
ContentType ::Entity | ContentType ::Text | ContentType ::Whitespace = > {
if $uep . is_none ( ) {
$uep = Some ( $proc . start_preventing_unintentional_entities ( ) ) ;
} ;
match content_type {
ContentType ::Entity = > {
let entity = $get_entity ;
match entity {
2020-01-14 04:47:42 -05:00
// TODO Comment: Explain why < is handled this way.
2020-01-14 01:55:27 -05:00
EntityType ::NonDecodableRightChevron ( _ ) = > $proc . after_write ( & mut $uep . take ( ) . unwrap ( ) , true ) ,
_ = > { }
} ;
entity . keep ( $proc ) ;
}
ContentType ::Text = > { $proc . accept ( ) ? ; }
ContentType ::Whitespace = > $on_whitespace ,
_ = > unreachable! ( ) ,
} ;
// UEP could have become None after matching EntityType::NonDecodableRightChevron.
if let Some ( uep ) = $uep . as_mut ( ) {
$proc . after_write ( uep , false ) ;
} ;
}
2020-01-06 07:36:05 -05:00
_ = > unreachable! ( ) ,
} ;
}
} ;
2020-01-04 01:39:37 -05:00
} ;
}
2020-01-06 07:36:05 -05:00
fn process_wss_content ( proc : & mut Processor , parent : Option < ProcessorRange > ) -> ProcessingResult < ( ) > {
let mut prev_sibling_closing_tag : Option < ProcessedTag > = None ;
2020-01-14 01:55:27 -05:00
let mut uep : Option < UnintentionalEntityPrevention > = None ;
2020-01-04 01:39:37 -05:00
loop {
2020-01-14 01:55:27 -05:00
handle_content_type! ( proc , parent , ContentType ::peek ( proc ) , uep , prev_sibling_closing_tag , parse_entity ( proc , false ) ? , { proc . accept ( ) ? ; } ) ;
2020-01-04 01:39:37 -05:00
} ;
Ok ( ( ) )
}
2019-12-25 21:47:18 -05:00
pub fn process_content ( proc : & mut Processor , parent : Option < ProcessorRange > ) -> ProcessingResult < ( ) > {
2020-01-17 03:27:34 -05:00
let & WhitespaceMinification { collapse , destroy_whole , trim } = get_whitespace_minification_for_tag ( parent . map ( | r | & proc [ r ] ) ) ;
2019-12-25 04:44:51 -05:00
2020-01-17 03:27:34 -05:00
if ! ( collapse | | destroy_whole | | trim ) {
2020-01-04 01:39:37 -05:00
// Normally whitespace entities are decoded and then ignored.
// However, if whitespace cannot be minified in any way,
// and we can't actually do anything but write whitespace as is,
// we would have to simply write skipped whitespace. This would cause
// issues when skipped whitespace includes encoded entities, so use
// function that does no whitespace handling. It's probably faster too.
2020-01-06 07:36:05 -05:00
return process_wss_content ( proc , parent ) ;
2020-01-04 01:39:37 -05:00
} ;
2019-12-25 04:44:51 -05:00
let mut last_non_whitespace_content_type = ContentType ::Start ;
// Whether or not currently in whitespace.
2020-01-04 01:39:37 -05:00
let mut currently_in_whitespace = false ;
2020-01-06 07:36:05 -05:00
// TODO Comment.
2019-12-30 00:52:59 -05:00
let mut entity : Option < EntityType > = None ;
2020-01-06 07:36:05 -05:00
// TODO Comment.
let mut prev_sibling_closing_tag : Option < ProcessedTag > = None ;
2020-01-14 01:55:27 -05:00
// TODO Comment.
let mut uep : Option < UnintentionalEntityPrevention > = None ;
2019-12-25 04:44:51 -05:00
loop {
2019-12-25 21:47:18 -05:00
let next_content_type = match ContentType ::peek ( proc ) {
ContentType ::Entity = > {
// Entity could decode to whitespace.
2019-12-30 00:52:59 -05:00
entity = Some ( parse_entity ( proc , false ) ? ) ;
2019-12-28 07:06:04 -05:00
let ws = match entity {
2019-12-30 00:52:59 -05:00
Some ( EntityType ::Ascii ( c ) ) = > is_whitespace ( c ) ,
2019-12-27 06:32:04 -05:00
_ = > false ,
} ;
if ws {
2019-12-25 21:47:18 -05:00
// Skip whitespace char, and mark as whitespace.
ContentType ::Whitespace
} else {
2019-12-30 00:52:59 -05:00
// Not whitespace, but don't write yet until any previously ignored whitespace has been processed later.
2019-12-25 21:47:18 -05:00
ContentType ::Entity
}
2019-12-26 08:23:33 -05:00
}
2019-12-25 21:47:18 -05:00
ContentType ::Whitespace = > {
// Whitespace is always ignored and then processed afterwards, even if not minifying.
2019-12-29 05:00:20 -05:00
proc . skip_expect ( ) ;
2019-12-25 21:47:18 -05:00
ContentType ::Whitespace
2019-12-26 08:23:33 -05:00
}
2019-12-25 21:47:18 -05:00
other_type = > other_type ,
} ;
2019-12-25 04:44:51 -05:00
if next_content_type = = ContentType ::Whitespace {
2020-01-04 01:39:37 -05:00
if ! currently_in_whitespace {
// This is the start of one or more whitespace characters.
currently_in_whitespace = true ;
} else {
// This is part of a contiguous whitespace, but not the start of, so simply ignore.
2019-12-25 04:44:51 -05:00
}
continue ;
}
// Next character is not whitespace, so handle any previously ignored whitespace.
2020-01-04 01:39:37 -05:00
if currently_in_whitespace {
2020-01-17 03:27:34 -05:00
if destroy_whole & & last_non_whitespace_content_type . is_comment_bang_instruction_opening_tag ( ) & & next_content_type . is_comment_bang_instruction_opening_tag ( ) {
2019-12-25 04:44:51 -05:00
// Whitespace is between two tags, comments, or bangs.
2020-01-17 03:27:34 -05:00
// `destroy_whole` is on, so don't write it.
} else if trim & & ( last_non_whitespace_content_type = = ContentType ::Start | | next_content_type = = ContentType ::End ) {
2019-12-25 04:44:51 -05:00
// Whitespace is leading or trailing.
2020-01-17 03:27:34 -05:00
// `trim` is on, so don't write it.
} else if collapse {
2020-01-17 19:42:01 -05:00
// If writing space, then prev_sibling_closing_tag no longer represents immediate previous sibling node; space will be new previous sibling node (as a text node).
prev_sibling_closing_tag . take ( ) . map ( | tag | tag . write_closing_tag ( proc ) ) ;
2019-12-25 04:44:51 -05:00
// Current contiguous whitespace needs to be reduced to a single space character.
proc . write ( b ' ' ) ;
} else {
2020-01-04 01:39:37 -05:00
unreachable! ( ) ;
} ;
2019-12-25 04:44:51 -05:00
2020-01-04 01:39:37 -05:00
// Reset whitespace marker.
currently_in_whitespace = false ;
2019-12-25 04:44:51 -05:00
} ;
// Process and consume next character(s).
2020-01-14 01:55:27 -05:00
handle_content_type! ( proc , parent , next_content_type , uep , prev_sibling_closing_tag , entity . unwrap ( ) , { unreachable! ( ) ; } ) ;
2019-12-25 21:47:18 -05:00
last_non_whitespace_content_type = next_content_type ;
2019-12-25 04:44:51 -05:00
} ;
Ok ( ( ) )
}