2021-08-06 02:17:45 -04:00
use memchr ::memchr ;
use crate ::gen ::codepoints ::ALPHANUMERIC_OR_EQUALS ;
2021-08-06 02:19:36 -04:00
use crate ::gen ::entities ::{ EntityType , ENTITY } ;
2021-08-06 02:17:45 -04:00
use crate ::pattern ::TrieNodeMatch ;
pub fn encode_ampersands ( mut code : & [ u8 ] , in_attr_val : bool ) -> Vec < u8 > {
let mut res = Vec ::< u8 > ::new ( ) ;
while ! code . is_empty ( ) {
let ( before , matched ) = match memchr ( b '&' , code ) {
None = > ( code . len ( ) , false ) ,
Some ( n ) = > ( n , true ) ,
} ;
res . extend_from_slice ( & code [ .. before ] ) ;
code = & code [ before .. ] ;
if matched {
2021-08-06 06:16:30 -04:00
let ( start , end ) = match ENTITY . longest_matching_prefix ( code ) {
2021-08-06 02:17:45 -04:00
// Entity is malformed, so we can just ignore it.
2021-08-06 06:16:30 -04:00
TrieNodeMatch ::NotFound { reached } = > ( 0 , reached ) ,
TrieNodeMatch ::Found { len , value } = > (
2021-08-06 02:17:45 -04:00
match value {
2021-08-06 02:19:36 -04:00
EntityType ::Named ( _ )
if in_attr_val
& & code [ len - 1 ] ! = b ';'
& & code
. get ( len )
. filter ( | & & c | ALPHANUMERIC_OR_EQUALS [ c ] )
. is_some ( ) = >
{
2021-08-06 02:17:45 -04:00
// A named entity inside an attribute value that doesn't end with semicolon but is followed by an alphanumeric or `=` character is not decoded, so we don't need to encode.
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state.
2021-08-06 06:16:30 -04:00
0
2021-08-06 02:17:45 -04:00
}
_ = > {
res . extend_from_slice ( b " & " ) ;
2021-08-06 06:16:30 -04:00
// Skip the leading ampersand, as it will be replaced by `&`.
1
2021-08-06 02:17:45 -04:00
}
2021-08-06 06:16:30 -04:00
} ,
len ,
) ,
2021-08-06 02:17:45 -04:00
} ;
2021-08-06 06:16:30 -04:00
res . extend_from_slice ( & code [ start .. end ] ) ;
code = & code [ end .. ] ;
2021-08-06 02:17:45 -04:00
} ;
2021-08-06 02:19:36 -04:00
}
2021-08-06 02:17:45 -04:00
res
}