Handle processing instructions
This commit is contained in:
parent
11adb24f00
commit
da830939d7
|
@ -320,7 +320,7 @@ JS and CSS comments are removed inside `<script>` and `<style>`.
|
|||
|
||||
### Ignored
|
||||
|
||||
Bangs and empty elements are not removed as it is assumed there is a special reason for their declaration.
|
||||
Bangs, [processing instructions](https://en.wikipedia.org/wiki/Processing_Instruction), and empty elements are not removed as it is assumed there is a special reason for their declaration.
|
||||
|
||||
## Parsing
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ const programs = {
|
|||
decodeEntities: true,
|
||||
html5: true,
|
||||
ignoreCustomComments: [],
|
||||
ignoreCustomFragments: [],
|
||||
ignoreCustomFragments: [/<\?[\s\S]*?\?>/],
|
||||
includeAutoGeneratedTags: true,
|
||||
keepClosingSlash: false,
|
||||
minifyCSS: false,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
{
|
||||
"COMMENT_END": "-->",
|
||||
"CSS_COMMENT_END": "*/"
|
||||
"CSS_COMMENT_END": "*/",
|
||||
"INSTRUCTION_END": "?>"
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ pub fn process_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
|
||||
chain!(proc.match_while_not_seq(COMMENT_END).discard());
|
||||
|
||||
chain!(proc.match_seq(b"-->").require()?.discard());
|
||||
chain!(proc.match_seq(b"-->").require_with_reason("end of comment")?.discard());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ use crate::spec::tag::omission::CLOSING_TAG_OMISSION_RULES;
|
|||
use crate::spec::tag::wss::WSS_TAGS;
|
||||
use crate::unit::bang::process_bang;
|
||||
use crate::unit::comment::process_comment;
|
||||
use crate::unit::instruction::process_instruction;
|
||||
use crate::unit::entity::{EntityType, parse_entity};
|
||||
use crate::unit::tag::{process_tag, ProcessedTag};
|
||||
|
||||
|
@ -15,6 +16,7 @@ use crate::unit::tag::{process_tag, ProcessedTag};
|
|||
enum ContentType {
|
||||
Comment,
|
||||
Bang,
|
||||
Instruction,
|
||||
OpeningTag,
|
||||
|
||||
Start,
|
||||
|
@ -25,9 +27,9 @@ enum ContentType {
|
|||
}
|
||||
|
||||
impl ContentType {
|
||||
fn is_comment_bang_opening_tag(&self) -> bool {
|
||||
fn is_comment_bang_instruction_opening_tag(&self) -> bool {
|
||||
match self {
|
||||
ContentType::Comment | ContentType::Bang | ContentType::OpeningTag => true,
|
||||
ContentType::Comment | ContentType::Bang | ContentType::Instruction | ContentType::OpeningTag => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +40,7 @@ impl ContentType {
|
|||
None => ContentType::End,
|
||||
Some(b'<') => match proc.peek_offset_eof(1) {
|
||||
Some(b'/') => ContentType::End,
|
||||
Some(b'?') => ContentType::Instruction,
|
||||
Some(b'!') => match proc.peek_slice_offset_eof(2, 2) {
|
||||
Some(b"--") => ContentType::Comment,
|
||||
_ => ContentType::Bang,
|
||||
|
@ -75,6 +78,7 @@ macro_rules! handle_content_type {
|
|||
match content_type {
|
||||
ContentType::Comment => { process_comment($proc)?; }
|
||||
ContentType::Bang => { process_bang($proc)?; }
|
||||
ContentType::Instruction => { process_instruction($proc)?; }
|
||||
ContentType::Entity => $on_entity,
|
||||
ContentType::Text => { $proc.accept()?; }
|
||||
ContentType::Whitespace => $on_whitespace,
|
||||
|
@ -165,7 +169,7 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
|||
|
||||
// Next character is not whitespace, so handle any previously ignored whitespace.
|
||||
if currently_in_whitespace {
|
||||
if destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_opening_tag() && next_content_type.is_comment_bang_opening_tag() {
|
||||
if destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_instruction_opening_tag() && next_content_type.is_comment_bang_instruction_opening_tag() {
|
||||
// Whitespace is between two tags, comments, or bangs.
|
||||
// destroy_whole_whitespace is on, so don't write it.
|
||||
} else if trim_whitespace && (last_non_whitespace_content_type == ContentType::Start || next_content_type == ContentType::End) {
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
use crate::err::ProcessingResult;
|
||||
use crate::proc::Processor;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_pattern_INSTRUCTION_END.rs"));
|
||||
|
||||
pub fn process_instruction(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"<?").expect().keep());
|
||||
} else {
|
||||
proc.accept_amount_expect(2);
|
||||
};
|
||||
|
||||
chain!(proc.match_while_not_seq(INSTRUCTION_END).keep());
|
||||
|
||||
chain!(proc.match_seq(b"?>").require_with_reason("end of processing instruction")?.keep());
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -3,6 +3,7 @@ pub mod bang;
|
|||
pub mod comment;
|
||||
pub mod content;
|
||||
pub mod entity;
|
||||
pub mod instruction;
|
||||
pub mod script;
|
||||
pub mod style;
|
||||
pub mod tag;
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
use phf::{phf_set, Set};
|
||||
|
||||
use crate::err::{ErrorType, ProcessingResult};
|
||||
use crate::proc::{Processor, ProcessorRange};
|
||||
use crate::spec::codepoint::{is_whitespace, is_digit, is_hex_digit, is_alphanumeric};
|
||||
use phf::{Set, phf_set};
|
||||
use crate::pattern::{ITrieNode, TrieLeafNode};
|
||||
use crate::proc::{Processor, ProcessorRange};
|
||||
use crate::spec::codepoint::{is_alphanumeric, is_digit, is_hex_digit, is_whitespace};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_trie_JS_PUNCTUATORS.rs"));
|
||||
|
||||
|
@ -13,6 +14,52 @@ static IF_WHILE_FOR_WITH: Set<&'static [u8]> = phf_set! {
|
|||
b"with",
|
||||
};
|
||||
|
||||
static KEYWORDS: Set<&'static [u8]> = phf_set! {
|
||||
b"await",
|
||||
b"break",
|
||||
b"case",
|
||||
b"catch",
|
||||
b"class",
|
||||
b"const",
|
||||
b"continue",
|
||||
b"debugger",
|
||||
b"default",
|
||||
b"delete",
|
||||
b"do",
|
||||
b"else",
|
||||
b"export",
|
||||
b"extends",
|
||||
b"finally",
|
||||
b"for",
|
||||
b"function",
|
||||
b"if",
|
||||
b"import",
|
||||
b"in",
|
||||
b"instanceof",
|
||||
b"new",
|
||||
b"return",
|
||||
b"super",
|
||||
b"switch",
|
||||
// For the purposes of regular expression literal identification, `this` is not considered a keyword.
|
||||
// b"this",
|
||||
b"throw",
|
||||
b"try",
|
||||
b"typeof",
|
||||
b"var",
|
||||
b"void",
|
||||
b"while",
|
||||
b"with",
|
||||
b"yield",
|
||||
// Reserved keywords.
|
||||
b"enum",
|
||||
b"implements",
|
||||
b"interface",
|
||||
b"package",
|
||||
b"private",
|
||||
b"protected",
|
||||
b"public",
|
||||
};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum Syntax {
|
||||
StartOfCode,
|
||||
|
@ -67,7 +114,7 @@ fn parse_literal_number(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_regex(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
fn parse_literal_regex(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(b'/').expect().keep());
|
||||
} else {
|
||||
|
@ -267,11 +314,11 @@ pub fn process_js_script(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
let is_regex = match last_syntax {
|
||||
Syntax::IfWhileForWithParentheses => true,
|
||||
Syntax::Punctuator => true,
|
||||
Syntax::Name(val) => !proc[val].eq(b"this"),
|
||||
Syntax::Name(val) => KEYWORDS.contains(&proc[val]),
|
||||
_ => false,
|
||||
};
|
||||
if is_regex {
|
||||
parse_regex(proc)?;
|
||||
parse_literal_regex(proc)?;
|
||||
last_syntax = Syntax::LiteralRegExp;
|
||||
} else {
|
||||
// Is divide operator.
|
||||
|
|
Loading…
Reference in New Issue