Fix handling of <script> and <style>; do not minify JS and CSS code
This commit is contained in:
parent
53199880f5
commit
8553b09eb3
|
@ -307,12 +307,6 @@ If an attribute value is empty after any processing, it is completely removed (i
|
|||
|
||||
Spaces are removed between attributes if possible.
|
||||
|
||||
### Script and style
|
||||
|
||||
Insignificant whitespace is trimmed and collapsed inside `<script>` with JS code and `<style>`.
|
||||
|
||||
JS and CSS comments are removed inside `<script>` and `<style>`.
|
||||
|
||||
### Other
|
||||
|
||||
- Comments are removed.
|
||||
|
@ -375,6 +369,4 @@ Special handling of some attributes require case sensitive names and values. For
|
|||
|
||||
`script` and `style` tags must be closed with `</script>` and `</style>` respectively (case sensitive).
|
||||
|
||||
Note that the closing tag must not contain any whitespace (e.g. `</script >`).
|
||||
|
||||
[hyperbuild can handle text script content.](./notes/Text%20script%20content.md)
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
10
|
|
@ -88,7 +88,7 @@ const runTest = test => new Promise((resolve, reject) => {
|
|||
// Run Rust library.
|
||||
for (const [testName, testOps] of JSON.parse(cmd(
|
||||
path.join(__dirname, 'hyperbuild-bench', 'target', 'release', 'hyperbuild-bench'),
|
||||
'--iterations', 2048,
|
||||
'--iterations', 512,
|
||||
'--tests', path.join(__dirname, 'tests'),
|
||||
))) {
|
||||
results[testName] = {hyperbuild: testOps};
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
pushd "$(dirname "$0")"
|
||||
|
||||
nodejs_cargo_toml="../nodejs/native/Cargo.toml"
|
||||
|
||||
rm -rf node_modules
|
||||
cp "$nodejs_cargo_toml" "$nodejs_cargo_toml.orig"
|
||||
sed -i 's%^hyperbuild = .*$%hyperbuild = { path = "../.." }%' "$nodejs_cargo_toml"
|
||||
HYPERBUILD_NODEJS_SKIP_BIN_DOWNLOAD=1 npm i
|
||||
mv "$nodejs_cargo_toml.orig" "$nodejs_cargo_toml"
|
||||
pushd hyperbuild-bench
|
||||
cargo build --release
|
||||
popd
|
||||
|
||||
popd
|
|
@ -1,9 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
rm -rf node_modules
|
||||
HYPERBUILD_NODEJS_SKIP_BIN_DOWNLOAD=1 npm i
|
||||
pushd hyperbuild-bench
|
||||
cargo build --release
|
||||
popd
|
|
@ -5,8 +5,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 362616,
|
||||
"relative": 0.6330862904281787
|
||||
"absolute": 493123,
|
||||
"relative": 0.8609366679760814
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 488822,
|
||||
|
@ -23,8 +23,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 224376,
|
||||
"relative": 0.559838517716393
|
||||
"absolute": 297108,
|
||||
"relative": 0.7413114696833979
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 298773,
|
||||
|
@ -41,8 +41,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 96856,
|
||||
"relative": 0.6222239353466829
|
||||
"absolute": 137131,
|
||||
"relative": 0.8809592640417317
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 137026,
|
||||
|
@ -59,8 +59,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 271250,
|
||||
"relative": 0.7342292960872684
|
||||
"absolute": 271470,
|
||||
"relative": 0.7348248000324821
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 270604,
|
||||
|
@ -77,8 +77,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 79380,
|
||||
"relative": 0.6298200514138818
|
||||
"absolute": 79853,
|
||||
"relative": 0.6335729474118506
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 79394,
|
||||
|
@ -95,8 +95,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 232058,
|
||||
"relative": 0.5906241489629755
|
||||
"absolute": 384569,
|
||||
"relative": 0.9787886577603123
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 383578,
|
||||
|
@ -131,8 +131,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 1383721,
|
||||
"relative": 0.6978107321127253
|
||||
"absolute": 1888405,
|
||||
"relative": 0.9523229578616866
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 1887947,
|
||||
|
@ -149,8 +149,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 831178,
|
||||
"relative": 0.5387307214917895
|
||||
"absolute": 1116125,
|
||||
"relative": 0.7234200454355427
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 1115617,
|
||||
|
@ -167,8 +167,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 86946,
|
||||
"relative": 0.5613076908178878
|
||||
"absolute": 89741,
|
||||
"relative": 0.5793517065959108
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 89321,
|
||||
|
@ -185,8 +185,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 270831,
|
||||
"relative": 0.9003600363028295
|
||||
"absolute": 273277,
|
||||
"relative": 0.9084916041395864
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 273174,
|
||||
|
@ -203,8 +203,8 @@
|
|||
"relative": 1
|
||||
},
|
||||
"hyperbuild-nodejs": {
|
||||
"absolute": 1347041,
|
||||
"relative": 0.5520822745589214
|
||||
"absolute": 1351483,
|
||||
"relative": 0.5539028200832156
|
||||
},
|
||||
"html-minifier": {
|
||||
"absolute": 1307563,
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 38 KiB |
|
@ -1,74 +1,74 @@
|
|||
{
|
||||
"Amazon.html": {
|
||||
"hyperbuild": 245.46705260338564,
|
||||
"hyperbuild-nodejs": 145.21435374237635,
|
||||
"html-minifier": 16.19830761009811,
|
||||
"minimize": 95.71966364576267
|
||||
"hyperbuild": 348.52605821546064,
|
||||
"hyperbuild-nodejs": 165.07093690973286,
|
||||
"html-minifier": 16.107592839431693,
|
||||
"minimize": 99.24208700487603
|
||||
},
|
||||
"BBC.html": {
|
||||
"hyperbuild": 429.2291873495222,
|
||||
"hyperbuild-nodejs": 251.3721939160052,
|
||||
"html-minifier": 18.333446052847226,
|
||||
"minimize": 108.38902861455512
|
||||
"hyperbuild": 498.2348139009136,
|
||||
"hyperbuild-nodejs": 244.6322429720145,
|
||||
"html-minifier": 18.585717845341243,
|
||||
"minimize": 116.27828832819753
|
||||
},
|
||||
"Bootstrap.html": {
|
||||
"hyperbuild": 235.08368235051825,
|
||||
"hyperbuild-nodejs": 156.19542771462898,
|
||||
"html-minifier": 8.557266916672539,
|
||||
"minimize": 22.359774537863895
|
||||
"hyperbuild": 238.21894278610927,
|
||||
"hyperbuild-nodejs": 156.53315203999523,
|
||||
"html-minifier": 8.3990089555115,
|
||||
"minimize": 22.909077061595145
|
||||
},
|
||||
"Bing.html": {
|
||||
"hyperbuild": 1008.1262435363229,
|
||||
"hyperbuild-nodejs": 585.3489088472239,
|
||||
"html-minifier": 79.35385186294975,
|
||||
"minimize": 435.31581246812584
|
||||
"hyperbuild": 1737.6334081200116,
|
||||
"hyperbuild-nodejs": 688.4893822559816,
|
||||
"html-minifier": 78.95988809648134,
|
||||
"minimize": 437.5366586028663
|
||||
},
|
||||
"Coding Horror.html": {
|
||||
"hyperbuild": 1146.867798530376,
|
||||
"hyperbuild-nodejs": 680.2295027510518,
|
||||
"html-minifier": 45.63362214760677,
|
||||
"minimize": 164.51899348138494
|
||||
"hyperbuild": 1197.4089706956722,
|
||||
"hyperbuild-nodejs": 669.904737573913,
|
||||
"html-minifier": 45.643674781314395,
|
||||
"minimize": 173.0258048899911
|
||||
},
|
||||
"Google.html": {
|
||||
"hyperbuild": 344.0346646025321,
|
||||
"hyperbuild-nodejs": 317.3708534283478,
|
||||
"html-minifier": 29.36827883130167,
|
||||
"minimize": 365.1698468973524
|
||||
"hyperbuild": 1123.9206824500823,
|
||||
"hyperbuild-nodejs": 920.376725868044,
|
||||
"html-minifier": 31.321054829311436,
|
||||
"minimize": 369.0906454521445
|
||||
},
|
||||
"Hacker News.html": {
|
||||
"hyperbuild": 1804.5683188361834,
|
||||
"hyperbuild-nodejs": 1259.6432378637871,
|
||||
"html-minifier": 66.43984413610241,
|
||||
"minimize": 255.30928557346104
|
||||
"hyperbuild": 1839.8486560618867,
|
||||
"hyperbuild-nodejs": 1255.30693251337,
|
||||
"html-minifier": 67.45295727773244,
|
||||
"minimize": 265.7472608824104
|
||||
},
|
||||
"NY Times.html": {
|
||||
"hyperbuild": 123.84742876588177,
|
||||
"hyperbuild-nodejs": 51.83081525871115,
|
||||
"html-minifier": 7.334756953956464,
|
||||
"minimize": 59.400301132747934
|
||||
"hyperbuild": 206.64831284965635,
|
||||
"hyperbuild-nodejs": 54.49167941039783,
|
||||
"html-minifier": 7.336661842305721,
|
||||
"minimize": 61.264331562390105
|
||||
},
|
||||
"Reddit.html": {
|
||||
"hyperbuild": 109.45057921629598,
|
||||
"hyperbuild-nodejs": 66.80243904185947,
|
||||
"html-minifier": 6.3323721760167695,
|
||||
"minimize": 44.528247219895
|
||||
"hyperbuild": 189.6454899629115,
|
||||
"hyperbuild-nodejs": 84.58158579201455,
|
||||
"html-minifier": 6.305846537661691,
|
||||
"minimize": 45.602895635511416
|
||||
},
|
||||
"Stack Overflow.html": {
|
||||
"hyperbuild": 763.6540095978328,
|
||||
"hyperbuild-nodejs": 496.21357271825997,
|
||||
"html-minifier": 39.39722290667494,
|
||||
"minimize": 148.07292819104936
|
||||
"hyperbuild": 795.551445372161,
|
||||
"hyperbuild-nodejs": 496.1578048486152,
|
||||
"html-minifier": 39.331066953478285,
|
||||
"minimize": 154.24858433261213
|
||||
},
|
||||
"Twitter.html": {
|
||||
"hyperbuild": 376.9341764747767,
|
||||
"hyperbuild-nodejs": 208.2611701306221,
|
||||
"html-minifier": 42.264558908660206,
|
||||
"minimize": 136.3651156178245
|
||||
"hyperbuild": 386.0676346339393,
|
||||
"hyperbuild-nodejs": 207.95620261405426,
|
||||
"html-minifier": 42.24757139208541,
|
||||
"minimize": 129.4921832398901
|
||||
},
|
||||
"Wikipedia.html": {
|
||||
"hyperbuild": 52.02792034641937,
|
||||
"hyperbuild-nodejs": 32.045431164840046,
|
||||
"html-minifier": 2.35238631274572,
|
||||
"minimize": 7.878943786969402
|
||||
"hyperbuild": 52.81648307515652,
|
||||
"hyperbuild-nodejs": 32.050455493661815,
|
||||
"html-minifier": 2.394314136599145,
|
||||
"minimize": 8.106531334304298
|
||||
}
|
||||
}
|
BIN
bench/speed.png
BIN
bench/speed.png
Binary file not shown.
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 38 KiB |
|
@ -1,5 +1,6 @@
|
|||
{
|
||||
"COMMENT_END": "-->",
|
||||
"CSS_COMMENT_END": "*/",
|
||||
"SCRIPT_END": "</script",
|
||||
"STYLE_END": "</style",
|
||||
"INSTRUCTION_END": "?>"
|
||||
}
|
||||
|
|
|
@ -1,54 +1 @@
|
|||
{
|
||||
"js punctuators": {
|
||||
"value_type": "bool",
|
||||
"//": "Some values are missing here because they are manually handled in `process_js_script` function.",
|
||||
"values": {
|
||||
"!": "true",
|
||||
"!=": "true",
|
||||
"!==": "true",
|
||||
"%": "true",
|
||||
"%=": "true",
|
||||
"&": "true",
|
||||
"&&": "true",
|
||||
"&=": "true",
|
||||
"*": "true",
|
||||
"**": "true",
|
||||
"**=": "true",
|
||||
"*=": "true",
|
||||
"+": "true",
|
||||
"++": "true",
|
||||
"+=": "true",
|
||||
",": "true",
|
||||
"-": "true",
|
||||
"--": "true",
|
||||
"-=": "true",
|
||||
"...": "true",
|
||||
":": "true",
|
||||
";": "true",
|
||||
"<": "true",
|
||||
"<<": "true",
|
||||
"<<=": "true",
|
||||
"<=": "true",
|
||||
"=": "true",
|
||||
"==": "true",
|
||||
"===": "true",
|
||||
"=>": "true",
|
||||
">": "true",
|
||||
">=": "true",
|
||||
">>": "true",
|
||||
">>=": "true",
|
||||
">>>": "true",
|
||||
">>>=": "true",
|
||||
"?": "true",
|
||||
"[": "true",
|
||||
"^": "true",
|
||||
"^=": "true",
|
||||
"{": "true",
|
||||
"|": "true",
|
||||
"|=": "true",
|
||||
"||": "true",
|
||||
"}": "true",
|
||||
"~": "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
{}
|
||||
|
|
|
@ -1,378 +1,10 @@
|
|||
use phf::{phf_set, Set};
|
||||
use crate::err::{ProcessingResult};
|
||||
use crate::proc::{Processor};
|
||||
|
||||
use crate::err::{ErrorType, ProcessingResult};
|
||||
use crate::pattern::{ITrieNode, TrieLeafNode};
|
||||
use crate::proc::{Processor, ProcessorRange};
|
||||
use crate::spec::codepoint::{is_alphanumeric, is_digit, is_hex_digit, is_whitespace};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_trie_JS_PUNCTUATORS.rs"));
|
||||
|
||||
static IF_WHILE_FOR_WITH: Set<&'static [u8]> = phf_set! {
|
||||
b"for",
|
||||
b"if",
|
||||
b"while",
|
||||
b"with",
|
||||
};
|
||||
|
||||
static KEYWORDS: Set<&'static [u8]> = phf_set! {
|
||||
b"await",
|
||||
b"break",
|
||||
b"case",
|
||||
b"catch",
|
||||
b"class",
|
||||
b"const",
|
||||
b"continue",
|
||||
b"debugger",
|
||||
b"default",
|
||||
b"delete",
|
||||
b"do",
|
||||
b"else",
|
||||
b"export",
|
||||
b"extends",
|
||||
b"finally",
|
||||
b"for",
|
||||
b"function",
|
||||
b"if",
|
||||
b"import",
|
||||
b"in",
|
||||
b"instanceof",
|
||||
b"new",
|
||||
b"return",
|
||||
b"super",
|
||||
b"switch",
|
||||
// For the purposes of regular expression literal identification, `this` is not considered a keyword.
|
||||
// b"this",
|
||||
b"throw",
|
||||
b"try",
|
||||
b"typeof",
|
||||
b"var",
|
||||
b"void",
|
||||
b"while",
|
||||
b"with",
|
||||
b"yield",
|
||||
// Reserved keywords.
|
||||
b"enum",
|
||||
b"implements",
|
||||
b"interface",
|
||||
b"package",
|
||||
b"private",
|
||||
b"protected",
|
||||
b"public",
|
||||
};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum Syntax {
|
||||
StartOfCode,
|
||||
Punctuator,
|
||||
IfWhileForWithParentheses,
|
||||
GroupingParentheses,
|
||||
ArrayLiteralOrComputedProperty,
|
||||
// `++` or `--`. One of these directly before `/` usually means it's postfix and operating the value to its left.
|
||||
// TODO However, this is not always the case.
|
||||
// TODO Doc
|
||||
DoubleOperator,
|
||||
LiteralStringOrTemplate,
|
||||
LiteralNumber,
|
||||
LiteralRegExp,
|
||||
// Keyword, identifier, or null/boolean literal.
|
||||
Name(ProcessorRange),
|
||||
}
|
||||
|
||||
fn is_string_delimiter(c: u8) -> bool {
|
||||
c == b'"' || c == b'\''
|
||||
}
|
||||
|
||||
fn is_number_exponent_indicator(c: u8) -> bool {
|
||||
c == b'e' || c == b'E'
|
||||
}
|
||||
|
||||
fn is_number_exponent_sign(c: u8) -> bool {
|
||||
c == b'+' || c == b'-'
|
||||
}
|
||||
|
||||
fn is_name_continuation(c: u8) -> bool {
|
||||
// TODO Doc
|
||||
// TODO This assumes that name starts with valid.
|
||||
// TODO This does not follow spec.
|
||||
is_alphanumeric(c) || c == b'$' || c == b'_'
|
||||
}
|
||||
|
||||
fn parse_literal_number(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if chain!(proc.match_char(b'0').keep().matched()) {
|
||||
match proc.peek()? {
|
||||
b'b' | b'B' | b'o' | b'O' | b'x' | b'X' => {
|
||||
// TODO Doc
|
||||
// Assume it's valid number and use `is_hex_digit` which works for all.
|
||||
proc.accept_expect();
|
||||
chain!(proc.match_while_pred(is_hex_digit).keep());
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
};
|
||||
chain!(proc.match_while_pred(is_digit).keep());
|
||||
chain!(proc.match_char(b'.').keep());
|
||||
chain!(proc.match_while_pred(is_digit).keep());
|
||||
chain!(proc.match_pred(is_number_exponent_indicator).keep());
|
||||
chain!(proc.match_pred(is_number_exponent_sign).keep());
|
||||
chain!(proc.match_while_pred(is_digit).keep());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_literal_regex(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(b'/').expect().keep());
|
||||
} else {
|
||||
proc.accept_expect();
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
let mut inside_class = false;
|
||||
|
||||
loop {
|
||||
let c = proc.accept()?;
|
||||
// We've already accepted char, so we can't use proc.match_line_terminator.
|
||||
// Line terminator cannot be escaped and is always invalid in a RegExp literal.
|
||||
if c == b'\r' || c == b'\n' {
|
||||
return Err(ErrorType::UnterminatedJsRegExp);
|
||||
};
|
||||
|
||||
if c == b'\\' {
|
||||
// If already escaping, then ignore backslash (interpret literally) and continue.
|
||||
// If not, then escape next character.
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
};
|
||||
|
||||
// If escaping, then none of these special characters matter.
|
||||
if !escaping {
|
||||
match (c, inside_class) {
|
||||
(b']', true) => inside_class = false,
|
||||
(b'[', false) => inside_class = true,
|
||||
(b'/', false) => break,
|
||||
_ => (),
|
||||
};
|
||||
} else {
|
||||
escaping = false;
|
||||
};
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"//").expect().discard());
|
||||
} else {
|
||||
proc.skip_amount_expect(2);
|
||||
};
|
||||
|
||||
// Comment can end at closing </script>.
|
||||
// TODO Optimise
|
||||
while !chain!(proc.match_line_terminator().discard().matched()) {
|
||||
if chain!(proc.match_seq(b"</script>").matched()) {
|
||||
break;
|
||||
};
|
||||
|
||||
proc.skip()?;
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"/*").expect().discard());
|
||||
} else {
|
||||
proc.skip_amount_expect(2);
|
||||
};
|
||||
|
||||
// Comment can end at closing </script>.
|
||||
// TODO Optimise
|
||||
while !chain!(proc.match_seq(b"*/").discard().matched()) {
|
||||
if chain!(proc.match_seq(b"</script>").matched()) {
|
||||
break;
|
||||
};
|
||||
|
||||
proc.skip()?;
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
let delim = if cfg!(debug_assertions) {
|
||||
chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
|
||||
} else {
|
||||
proc.accept_expect()
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
|
||||
loop {
|
||||
let c = proc.accept()?;
|
||||
|
||||
if c == b'\\' {
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
};
|
||||
|
||||
if !escaping {
|
||||
if c == delim {
|
||||
break;
|
||||
};
|
||||
// We've already accepted char, so we can't use proc.match_line_terminator.
|
||||
if c == b'\r' || c == b'\n' {
|
||||
return Err(ErrorType::UnterminatedJsString);
|
||||
};
|
||||
} else {
|
||||
escaping = false;
|
||||
};
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_template(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(b'`').expect().keep());
|
||||
} else {
|
||||
proc.accept_expect();
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
|
||||
loop {
|
||||
let c = proc.accept()?;
|
||||
|
||||
if c == b'\\' {
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
}
|
||||
|
||||
if c == b'`' && !escaping {
|
||||
break;
|
||||
}
|
||||
|
||||
escaping = false;
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_pattern_SCRIPT_END.rs"));
|
||||
|
||||
pub fn process_js_script(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
chain!(proc.match_while_pred(is_whitespace).discard());
|
||||
// This variable is used so that trailing whitespace is simply trimmed/removed instead of collapsed.
|
||||
let mut discarded_whitespace = false;
|
||||
// Only updated when currently inside parentheses `()` directly after one of these keywords:
|
||||
// - if (...)
|
||||
// - while (...) // Note that this includes `do {...} while (...)` without trailing semicolon.
|
||||
// - for (...)
|
||||
// - with (...)
|
||||
let mut parenthesis_depth = 0usize;
|
||||
let mut last_syntax: Syntax = Syntax::StartOfCode;
|
||||
// Cannot just break on match "</" as that could be "</a/.exec(str)?.length".
|
||||
while !chain!(proc.match_seq(b"</script").matched()) {
|
||||
if discarded_whitespace {
|
||||
proc.write(b' ');
|
||||
discarded_whitespace = false;
|
||||
};
|
||||
match proc.peek_offset(0)? {
|
||||
c if is_whitespace(c) => {
|
||||
chain!(proc.match_while_pred(is_whitespace).discard());
|
||||
discarded_whitespace = true;
|
||||
}
|
||||
b'.' => {
|
||||
// TODO Handle `...`
|
||||
if is_digit(proc.peek_offset(1)?) {
|
||||
// Is numeric literal starting with decimal dot.
|
||||
parse_literal_number(proc)?;
|
||||
last_syntax = Syntax::LiteralNumber;
|
||||
} else {
|
||||
// Is dot operator.
|
||||
proc.accept_expect();
|
||||
last_syntax = Syntax::Punctuator;
|
||||
};
|
||||
}
|
||||
b'(' => {
|
||||
proc.accept_expect();
|
||||
if parenthesis_depth > 0 || match last_syntax {
|
||||
Syntax::Name(r) => IF_WHILE_FOR_WITH.contains(&proc[r]),
|
||||
_ => false,
|
||||
} {
|
||||
parenthesis_depth += 1;
|
||||
};
|
||||
last_syntax = Syntax::Punctuator;
|
||||
}
|
||||
b')' => {
|
||||
proc.accept_expect();
|
||||
last_syntax = Syntax::GroupingParentheses;
|
||||
if parenthesis_depth > 0 {
|
||||
parenthesis_depth -= 1;
|
||||
if parenthesis_depth == 0 {
|
||||
last_syntax = Syntax::IfWhileForWithParentheses;
|
||||
};
|
||||
};
|
||||
}
|
||||
b']' => {
|
||||
proc.accept_expect();
|
||||
last_syntax = Syntax::ArrayLiteralOrComputedProperty;
|
||||
}
|
||||
c if c == b'+' || c == b'-' => {
|
||||
proc.accept_expect();
|
||||
if proc.peek()? == c {
|
||||
proc.accept_expect();
|
||||
last_syntax = Syntax::DoubleOperator;
|
||||
} else {
|
||||
chain!(proc.match_char(b'=').discard());
|
||||
last_syntax = Syntax::Punctuator;
|
||||
};
|
||||
}
|
||||
c if is_digit(c) => {
|
||||
parse_literal_number(proc)?;
|
||||
last_syntax = Syntax::LiteralNumber;
|
||||
}
|
||||
b'/' => match proc.peek_offset(1)? {
|
||||
b'/' => parse_comment_single(proc)?,
|
||||
b'*' => parse_comment_multi(proc)?,
|
||||
b'=' => {
|
||||
// Is `/=` operator.
|
||||
proc.accept_amount_expect(2);
|
||||
last_syntax = Syntax::Punctuator;
|
||||
}
|
||||
_ => {
|
||||
let is_regex = match last_syntax {
|
||||
Syntax::IfWhileForWithParentheses => true,
|
||||
Syntax::Punctuator => true,
|
||||
Syntax::Name(val) => KEYWORDS.contains(&proc[val]),
|
||||
_ => false,
|
||||
};
|
||||
if is_regex {
|
||||
parse_literal_regex(proc)?;
|
||||
last_syntax = Syntax::LiteralRegExp;
|
||||
} else {
|
||||
// Is divide operator.
|
||||
proc.accept_expect();
|
||||
last_syntax = Syntax::Punctuator;
|
||||
};
|
||||
}
|
||||
}
|
||||
c if is_string_delimiter(c) => {
|
||||
parse_string(proc)?;
|
||||
last_syntax = Syntax::LiteralStringOrTemplate;
|
||||
}
|
||||
b'`' => {
|
||||
parse_template(proc)?;
|
||||
last_syntax = Syntax::LiteralStringOrTemplate;
|
||||
}
|
||||
_ => {
|
||||
if chain!(proc.match_trie(JS_PUNCTUATORS).keep().matched()) {
|
||||
last_syntax = Syntax::Punctuator;
|
||||
} else {
|
||||
last_syntax = Syntax::Name(chain!(proc.match_while_pred(is_name_continuation).require_with_reason("JavaScript")?.keep().out_range()));
|
||||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
// `process_tag` will require closing tag.
|
||||
chain!(proc.match_while_not_seq(SCRIPT_END).keep());
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,83 +1,10 @@
|
|||
use crate::err::{ErrorType, ProcessingResult};
|
||||
use crate::err::ProcessingResult;
|
||||
use crate::proc::Processor;
|
||||
use crate::spec::codepoint::is_whitespace;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_pattern_CSS_COMMENT_END.rs"));
|
||||
|
||||
fn is_string_delimiter(c: u8) -> bool {
|
||||
match c {
|
||||
b'"' | b'\'' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"/*").expect().discard());
|
||||
} else {
|
||||
proc.skip_amount_expect(2);
|
||||
};
|
||||
|
||||
// Unlike script tags, style comments do NOT end at closing tag.
|
||||
chain!(proc.match_while_not_seq(CSS_COMMENT_END).discard());
|
||||
chain!(proc.match_seq(b"*/").require_with_reason("CSS comment end")?.discard());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
let delim = if cfg!(debug_assertions) {
|
||||
chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
|
||||
} else {
|
||||
proc.accept_expect()
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
|
||||
loop {
|
||||
let c = proc.accept()?;
|
||||
|
||||
if c == b'\\' {
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
};
|
||||
|
||||
if !escaping {
|
||||
if c == delim {
|
||||
break;
|
||||
};
|
||||
// We've already accepted char, so we can't use proc.match_line_terminator.
|
||||
if c == b'\r' || c == b'\n' {
|
||||
return Err(ErrorType::UnterminatedCssString);
|
||||
};
|
||||
} else {
|
||||
escaping = false;
|
||||
};
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_pattern_STYLE_END.rs"));
|
||||
|
||||
pub fn process_style(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
// TODO Refactor
|
||||
chain!(proc.match_while_pred(is_whitespace).discard());
|
||||
// This variable is used so that trailing whitespace is simply trimmed/removed instead of collapsed.
|
||||
let mut discarded_whitespace = false;
|
||||
while !chain!(proc.match_seq(b"</").matched()) {
|
||||
if discarded_whitespace {
|
||||
proc.write(b' ');
|
||||
discarded_whitespace = false;
|
||||
};
|
||||
if chain!(proc.match_while_pred(is_whitespace).discard().matched()) {
|
||||
discarded_whitespace = true;
|
||||
} else if chain!(proc.match_seq(b"/*").matched()) {
|
||||
parse_comment(proc)?;
|
||||
} else if chain!(proc.match_pred(is_string_delimiter).matched()) {
|
||||
parse_string(proc)?;
|
||||
} else {
|
||||
proc.accept()?;
|
||||
};
|
||||
};
|
||||
|
||||
// `process_tag` will require closing tag.
|
||||
chain!(proc.match_while_not_seq(STYLE_END).keep());
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue