2019-12-25 21:47:18 -05:00
use crate ::err ::ProcessingResult ;
2020-01-18 06:19:06 -05:00
use crate ::proc ::{ Processor , ProcessorRange } ;
2019-12-25 04:44:51 -05:00
use crate ::spec ::codepoint ::is_whitespace ;
2020-01-06 07:36:05 -05:00
use crate ::spec ::tag ::omission ::CLOSING_TAG_OMISSION_RULES ;
2020-01-18 06:19:06 -05:00
use crate ::spec ::tag ::whitespace ::{ get_whitespace_minification_for_tag , WhitespaceMinification } ;
2019-12-25 04:44:51 -05:00
use crate ::unit ::bang ::process_bang ;
use crate ::unit ::comment ::process_comment ;
2019-12-28 07:06:04 -05:00
use crate ::unit ::entity ::{ EntityType , parse_entity } ;
2020-01-14 01:55:27 -05:00
use crate ::unit ::instruction ::process_instruction ;
2020-01-18 06:19:06 -05:00
use crate ::unit ::tag ::{ MaybeClosingTag , process_tag } ;
2019-12-25 04:44:51 -05:00
2020-01-03 00:57:32 -05:00
#[ derive(Copy, Clone, PartialEq, Eq) ]
2019-12-25 04:44:51 -05:00
enum ContentType {
Comment ,
Bang ,
2020-01-08 07:00:23 -05:00
Instruction ,
2020-01-18 06:19:06 -05:00
Tag ,
2019-12-25 04:44:51 -05:00
Start ,
End ,
Entity ,
Text ,
}
impl ContentType {
2020-01-18 06:19:06 -05:00
fn is_tag_like ( & self ) -> bool {
2020-01-18 06:39:33 -05:00
// Do not include Comment as comments are not written.
2019-12-25 04:44:51 -05:00
match self {
2020-01-18 06:39:33 -05:00
ContentType ::Bang | ContentType ::Instruction | ContentType ::Tag = > true ,
2020-01-18 06:19:06 -05:00
_ = > false ,
}
}
2019-12-25 07:29:18 -05:00
fn peek ( proc : & mut Processor ) -> ContentType {
2020-01-04 21:55:20 -05:00
// Manually write out matching for fast performance as this is hot spot; don't use generated trie.
2020-01-06 02:13:24 -05:00
match proc . peek_offset_eof ( 0 ) {
2020-01-04 21:28:34 -05:00
None = > ContentType ::End ,
Some ( b '<' ) = > match proc . peek_offset_eof ( 1 ) {
Some ( b '/' ) = > ContentType ::End ,
2020-01-08 07:00:23 -05:00
Some ( b '?' ) = > ContentType ::Instruction ,
2020-01-04 21:28:34 -05:00
Some ( b '!' ) = > match proc . peek_slice_offset_eof ( 2 , 2 ) {
Some ( b " -- " ) = > ContentType ::Comment ,
_ = > ContentType ::Bang ,
} ,
2020-01-18 06:19:06 -05:00
_ = > ContentType ::Tag
2020-01-04 21:28:34 -05:00
} ,
Some ( b '&' ) = > ContentType ::Entity ,
2020-01-18 06:19:06 -05:00
Some ( _ ) = > ContentType ::Text ,
2020-01-04 21:28:34 -05:00
}
2019-12-25 04:44:51 -05:00
}
}
2020-01-18 06:19:06 -05:00
pub fn process_content ( proc : & mut Processor , parent : Option < ProcessorRange > ) -> ProcessingResult < ( ) > {
let & WhitespaceMinification { collapse , destroy_whole , trim } = get_whitespace_minification_for_tag ( parent . map ( | r | & proc [ r ] ) ) ;
let handle_ws = collapse | | destroy_whole | | trim ;
let mut last_written = ContentType ::Start ;
// Whether or not currently in whitespace.
let mut ws_skipped = false ;
// TODO Comment: Do not always initialise `uep` as `prev_sibling_closing_tag` might get written.
let mut prev_sibling_closing_tag = MaybeClosingTag ::none ( ) ;
// TODO Comment.
let uep = & mut proc . start_preventing_unintentional_entities ( ) ;
loop {
// Do not write anything until any previously ignored whitespace has been processed later.
let next_content_type = ContentType ::peek ( proc ) ;
let entity : Option < EntityType > = match next_content_type {
ContentType ::Entity = > Some ( parse_entity ( proc , false ) ? ) ,
_ = > None ,
} ;
if handle_ws {
// If any of these arms match, this is the start or part of one or more whitespace characters.
// Simply ignore and process until first non-whitespace.
if match ( next_content_type , entity ) {
( _ , Some ( EntityType ::Ascii ( c ) ) ) if is_whitespace ( c ) = > true ,
( ContentType ::Text , _ ) = > chain! ( proc . match_pred ( is_whitespace ) . discard ( ) . matched ( ) ) ,
_ = > false ,
} {
ws_skipped = true ;
continue ;
} ;
// Next character is not whitespace, so handle any previously ignored whitespace.
if ws_skipped {
if destroy_whole & & last_written . is_tag_like ( ) & & next_content_type . is_tag_like ( ) {
// Whitespace is between two tags, comments, instructions, or bangs.
// `destroy_whole` is on, so don't write it.
2020-01-18 19:32:38 -05:00
} else if trim & & ( last_written = = ContentType ::Start | | next_content_type = = ContentType ::End ) {
2020-01-18 06:19:06 -05:00
// Whitespace is leading or trailing.
// `trim` is on, so don't write it.
} else if collapse {
// If writing space, then prev_sibling_closing_tag no longer represents immediate previous sibling node; space will be new previous sibling node (as a text node).
prev_sibling_closing_tag . write_if_exists ( proc ) ;
// Current contiguous whitespace needs to be reduced to a single space character.
proc . write ( b ' ' ) ;
last_written = ContentType ::Text ;
} else {
unreachable! ( ) ;
} ;
// Reset whitespace marker.
ws_skipped = false ;
} ;
2020-01-15 06:09:16 -05:00
} ;
2020-01-18 06:19:06 -05:00
// Process and consume next character(s).
2020-01-15 06:09:16 -05:00
match next_content_type {
2020-01-18 19:32:38 -05:00
ContentType ::Comment = > {
// Comments are completely ignored and do not affect anything (previous element node's closing tag, unintentional entities, whitespace, etc.).
process_comment ( proc ) ? ;
continue ;
}
2020-01-18 06:19:06 -05:00
ContentType ::Tag = > {
proc . suspend ( uep ) ;
let new_closing_tag = process_tag (
proc ,
prev_sibling_closing_tag ,
) ? ;
prev_sibling_closing_tag . replace ( new_closing_tag ) ;
// Always resume as closing tag might not exist or be omitted.
proc . resume ( uep ) ;
2020-01-06 07:36:05 -05:00
}
ContentType ::End = > {
2020-01-18 06:19:06 -05:00
proc . end ( uep ) ;
if prev_sibling_closing_tag . exists_and ( | prev_tag |
CLOSING_TAG_OMISSION_RULES
. get ( & proc [ prev_tag ] )
. filter ( | rule | rule . can_omit_as_last_node ( parent . map ( | p | & proc [ p ] ) ) )
. is_none ( )
) {
prev_sibling_closing_tag . write ( proc ) ;
2020-01-06 07:36:05 -05:00
} ;
break ;
}
content_type = > {
// Immediate next sibling node is not an element, so write any immediate previous sibling element's closing tag.
2020-01-18 06:19:06 -05:00
// UEP is resumed after processing a tag and setting `prev_sibling_closing_tag` (see ContentType::Tag arm), so suspend it before writing any closing tag (even though nothing should've been written since tag was processed and `prev_sibling_closing_tag` was set).
if prev_sibling_closing_tag . exists ( ) {
proc . suspend ( uep ) ;
prev_sibling_closing_tag . write ( proc ) ;
proc . resume ( uep ) ;
} ;
2020-01-06 07:36:05 -05:00
match content_type {
2020-01-18 19:32:38 -05:00
ContentType ::Bang | ContentType ::Instruction = > {
2020-01-18 06:19:06 -05:00
proc . suspend ( uep ) ;
2020-01-14 01:55:27 -05:00
match content_type {
2020-01-18 06:19:06 -05:00
ContentType ::Bang = > { process_bang ( proc ) ? ; }
ContentType ::Instruction = > { process_instruction ( proc ) ? ; }
2020-01-14 01:55:27 -05:00
_ = > unreachable! ( ) ,
} ;
2020-01-18 06:19:06 -05:00
proc . resume ( uep ) ;
2020-01-14 01:55:27 -05:00
}
2020-01-18 06:19:06 -05:00
ContentType ::Entity | ContentType ::Text = > {
uep . expect_active ( ) ;
match entity {
// TODO Comment: Explain why < is handled this way.
2020-01-18 06:39:33 -05:00
Some ( entity @ EntityType ::NonDecodableRightChevron ( _ ) ) = > {
2020-01-18 06:19:06 -05:00
proc . suspend ( uep ) ;
2020-01-18 06:39:33 -05:00
entity . keep ( proc ) ;
2020-01-18 06:19:06 -05:00
proc . resume ( uep ) ;
}
Some ( entity ) = > {
entity . keep ( proc ) ;
}
// Is text.
None = > {
proc . accept ( ) ? ;
2020-01-14 01:55:27 -05:00
}
} ;
2020-01-18 06:19:06 -05:00
proc . update ( uep ) ;
2020-01-14 01:55:27 -05:00
}
2020-01-06 07:36:05 -05:00
_ = > unreachable! ( ) ,
} ;
}
} ;
2020-01-04 01:39:37 -05:00
2020-01-18 19:32:38 -05:00
// This should not be reached if ContentType::Comment.
last_written = next_content_type ;
2019-12-25 04:44:51 -05:00
} ;
Ok ( ( ) )
}