Remove unused code; handle escaped and double-escaped script data; allow using buffer instead of string source for bench; enforce matching tags
This commit is contained in:
parent
3ed4067304
commit
e966f9a23b
|
@ -369,4 +369,4 @@ Special handling of some attributes require case sensitive names and values. For
|
|||
|
||||
`script` and `style` tags must be closed with `</script>` and `</style>` respectively (case sensitive).
|
||||
|
||||
[hyperbuild can handle text script content.](./notes/Text%20script%20content.md)
|
||||
[hyperbuild can handle escaped and double-escaped script content.](./notes/Script%20data.md)
|
||||
|
|
|
@ -57,7 +57,7 @@ const setSize = (program, test, result) => {
|
|||
for (const t of tests) {
|
||||
for (const p of Object.keys(programs)) {
|
||||
try {
|
||||
setSize(p, t.name, programs[p](t.content).length);
|
||||
setSize(p, t.name, programs[p](t.contentAsString, t.contentAsBuffer).length);
|
||||
} catch (err) {
|
||||
console.error(`Failed to run ${p} on test ${t.name}:`);
|
||||
console.error(err);
|
||||
|
@ -72,7 +72,7 @@ const runTest = test => new Promise((resolve, reject) => {
|
|||
const suite = new benchmark.Suite();
|
||||
for (const p of Object.keys(programs)) {
|
||||
suite.add(p, () => {
|
||||
programs[p](test.content);
|
||||
programs[p](test.contentAsString, test.contentAsBuffer);
|
||||
});
|
||||
}
|
||||
suite
|
||||
|
|
|
@ -7,7 +7,11 @@ pushd "$(dirname "$0")"
|
|||
nodejs_cargo_toml="../nodejs/native/Cargo.toml"
|
||||
|
||||
rm -rf node_modules
|
||||
cp "$nodejs_cargo_toml" "$nodejs_cargo_toml.orig"
|
||||
if [ -f "$nodejs_cargo_toml.orig" ]; then
|
||||
echo 'Not altering Node.js Cargo.toml file'
|
||||
else
|
||||
cp "$nodejs_cargo_toml" "$nodejs_cargo_toml.orig"
|
||||
fi
|
||||
sed -i 's%^hyperbuild = .*$%hyperbuild = { path = "../.." }%' "$nodejs_cargo_toml"
|
||||
HYPERBUILD_NODEJS_SKIP_BIN_DOWNLOAD=1 npm i
|
||||
mv "$nodejs_cargo_toml.orig" "$nodejs_cargo_toml"
|
||||
|
|
|
@ -3,7 +3,7 @@ const hyperbuild = require("hyperbuild");
|
|||
const minimize = require("minimize");
|
||||
|
||||
module.exports = {
|
||||
'hyperbuild-nodejs': content => hyperbuild.minify(content),
|
||||
'hyperbuild-nodejs': (_, buffer) => hyperbuild.minify_in_place(Buffer.from(buffer)),
|
||||
'html-minifier': content => htmlMinifier.minify(content, {
|
||||
collapseBooleanAttributes: true,
|
||||
collapseInlineTagWhitespace: true,
|
||||
|
|
|
@ -4,5 +4,6 @@ const path = require('path');
|
|||
const testsDir = path.join(__dirname, 'tests');
|
||||
module.exports = fs.readdirSync(testsDir).map(name => ({
|
||||
name,
|
||||
content: fs.readFileSync(path.join(testsDir, name), 'utf8'),
|
||||
contentAsString: fs.readFileSync(path.join(testsDir, name), 'utf8'),
|
||||
contentAsBuffer: fs.readFileSync(path.join(testsDir, name)),
|
||||
})).sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
|
|
@ -23,25 +23,16 @@ const fetchReactTypingsSource = async () => {
|
|||
};
|
||||
|
||||
const processReactTypeDeclarations = async (source) => {
|
||||
let tagNameToInterfaceMap;
|
||||
let booleanAttributes = new Map();
|
||||
|
||||
const unvisited = [source];
|
||||
while (unvisited.length) {
|
||||
const node = unvisited.shift();
|
||||
let matches;
|
||||
switch (node.kind) {
|
||||
case ts.SyntaxKind.InterfaceDeclaration:
|
||||
const name = node.name.escapedText;
|
||||
if (name === "ReactHTML") {
|
||||
// Each member of ReactHTML looks something like:
|
||||
//
|
||||
// area: DetailedHTMLFactory<AreaHTMLAttributes<HTMLAreaElement>, HTMLAreaElement>;
|
||||
// ^^^^ [1] ^^^^^^^^^^^^^^^ [2]
|
||||
//
|
||||
// Get mapping from tag name [1] to interface name [2].
|
||||
tagNameToInterfaceMap = Object.fromEntries(node.members.map(m => [m.name.escapedText, m.type.typeArguments[1].typeName.escapedText]));
|
||||
} else if ((matches = /^([A-Za-z]+)HTMLAttributes/.exec(name))) {
|
||||
let matches;
|
||||
if ((matches = /^([A-Za-z]+)HTMLAttributes/.exec(name))) {
|
||||
const tagName = matches[1].toLowerCase();
|
||||
if (!['all', 'webview'].includes(tagName)) {
|
||||
node.members
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
{
|
||||
"COMMENT_END": "-->",
|
||||
"SCRIPT_END": "</script",
|
||||
"STYLE_END": "</style",
|
||||
"INSTRUCTION_END": "?>"
|
||||
}
|
||||
|
|
|
@ -6,5 +6,9 @@ module.exports = {
|
|||
const len = hyperbuild.minify(buf);
|
||||
return buf.slice(0, len).toString();
|
||||
},
|
||||
minify_in_place: hyperbuild.minify,
|
||||
minify_in_place: buf => {
|
||||
const len = hyperbuild.minify(buf);
|
||||
// This does not do a copy.
|
||||
return buf.slice(0, len);
|
||||
},
|
||||
};
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# Text script content
|
||||
# Script data
|
||||
|
||||
For legacy reasons, special handling is required for content inside a script tag; see https://www.w3.org/TR/html52/syntax.html#script-data-state for more details.
|
||||
|
||||
```html
|
||||
<script type="text/html">
|
16
src/err.rs
16
src/err.rs
|
@ -2,10 +2,8 @@
|
|||
#[derive(Debug)]
|
||||
pub enum ErrorType {
|
||||
EntityFollowingMalformedEntity,
|
||||
ClosingTagMismatch,
|
||||
NoSpaceBeforeAttr,
|
||||
UnterminatedCssString,
|
||||
UnterminatedJsString,
|
||||
UnterminatedJsRegExp,
|
||||
CharNotFound { need: u8, got: u8 },
|
||||
MatchNotFound(&'static [u8]),
|
||||
NotFound(&'static str),
|
||||
|
@ -19,18 +17,12 @@ impl ErrorType {
|
|||
ErrorType::EntityFollowingMalformedEntity => {
|
||||
format!("Entity cannot follow malformed entity.")
|
||||
}
|
||||
ErrorType::ClosingTagMismatch => {
|
||||
format!("Opening tag name does not match closing tag.")
|
||||
}
|
||||
ErrorType::NoSpaceBeforeAttr => {
|
||||
format!("Space required before attribute.")
|
||||
}
|
||||
ErrorType::UnterminatedCssString => {
|
||||
format!("Unterminated CSS string.")
|
||||
}
|
||||
ErrorType::UnterminatedJsString => {
|
||||
format!("Unterminated JavaScript string.")
|
||||
}
|
||||
ErrorType::UnterminatedJsRegExp => {
|
||||
format!("Unterminated JavaScript regular expression.")
|
||||
}
|
||||
ErrorType::CharNotFound { need, got } => {
|
||||
format!("Expected {} (U+{:X}), got {} (U+{:X}).", need as char, need, got as char, got)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
use crate::err::ProcessingResult;
|
||||
use crate::proc::Processor;
|
||||
|
||||
// See https://www.w3.org/TR/html52/syntax.html#script-data-state and "notes/Script data.md".
|
||||
|
||||
enum State {
|
||||
End,
|
||||
Normal,
|
||||
Escaped,
|
||||
DoubleEscaped,
|
||||
}
|
||||
|
||||
// TODO Optimise all functions in this file.
|
||||
|
||||
fn process_script_data_double_escaped(proc: &mut Processor) -> ProcessingResult<State> {
|
||||
loop {
|
||||
if chain!(proc.match_seq(b"</script").keep().matched()) {
|
||||
return Ok(State::Escaped);
|
||||
};
|
||||
if chain!(proc.match_seq(b"-->").keep().matched()) {
|
||||
return Ok(State::Normal);
|
||||
};
|
||||
proc.accept()?;
|
||||
};
|
||||
}
|
||||
|
||||
fn process_script_data_escaped(proc: &mut Processor) -> ProcessingResult<State> {
|
||||
loop {
|
||||
if chain!(proc.match_seq(b"<script").keep().matched()) {
|
||||
return Ok(State::DoubleEscaped);
|
||||
};
|
||||
if chain!(proc.match_seq(b"</script").matched()) {
|
||||
return Ok(State::End);
|
||||
};
|
||||
if chain!(proc.match_seq(b"-->").keep().matched()) {
|
||||
return Ok(State::Normal);
|
||||
};
|
||||
proc.accept()?;
|
||||
};
|
||||
}
|
||||
|
||||
fn process_script_data(proc: &mut Processor) -> ProcessingResult<State> {
|
||||
loop {
|
||||
if chain!(proc.match_seq(b"</script").matched()) {
|
||||
return Ok(State::End);
|
||||
};
|
||||
if chain!(proc.match_seq(b"<!--").keep().matched()) {
|
||||
return Ok(State::Escaped);
|
||||
};
|
||||
proc.accept()?;
|
||||
};
|
||||
}
|
||||
|
||||
pub fn process_script(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
// NOTE: See "notes/Script data.md".
|
||||
let mut state = State::Normal;
|
||||
loop {
|
||||
state = match state {
|
||||
State::End => break,
|
||||
State::Normal => process_script_data(proc)?,
|
||||
State::Escaped => process_script_data_escaped(proc)?,
|
||||
State::DoubleEscaped => process_script_data_double_escaped(proc)?,
|
||||
};
|
||||
};
|
||||
Ok(())
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
use crate::err::{ProcessingResult};
|
||||
use crate::proc::{Processor};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_pattern_SCRIPT_END.rs"));
|
||||
|
||||
pub fn process_js_script(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
// `process_tag` will require closing tag.
|
||||
chain!(proc.match_while_not_seq(SCRIPT_END).keep());
|
||||
Ok(())
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
pub mod js;
|
||||
pub mod text;
|
|
@ -1,35 +0,0 @@
|
|||
use crate::err::ProcessingResult;
|
||||
use crate::proc::Processor;
|
||||
|
||||
pub fn process_text_script(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
// NOTE: See "notes/Text script content.md".
|
||||
let mut in_comment = false;
|
||||
let mut comment_has_unclosed_script = false;
|
||||
loop {
|
||||
// TODO Optimise
|
||||
if chain!(proc.match_seq(b"<!--").keep().matched()) {
|
||||
// NOTE: Could already be in comment, so don't reset `comment_has_unclosed_script`.
|
||||
in_comment = true;
|
||||
} else if chain!(proc.match_seq(b"-->").keep().matched()) {
|
||||
comment_has_unclosed_script = false;
|
||||
in_comment = false;
|
||||
} else if in_comment && chain!(proc.match_seq(b"<script").keep().matched()) {
|
||||
// TODO DOC Case sensitive, nothing else in tag.
|
||||
// TODO Opening tag can have attributes, whitespace, etc.
|
||||
chain!(proc.match_char(b'>').require()?.keep());
|
||||
comment_has_unclosed_script = true;
|
||||
} else if chain!(proc.match_seq(b"</script").matched()) {
|
||||
if !comment_has_unclosed_script {
|
||||
break;
|
||||
}
|
||||
comment_has_unclosed_script = false;
|
||||
// Keep previously matched closing tag start.
|
||||
proc.keep();
|
||||
// TODO Close tag can have whitespace.
|
||||
chain!(proc.match_char(b'>').require()?.keep());
|
||||
} else {
|
||||
proc.accept()?;
|
||||
};
|
||||
};
|
||||
Ok(())
|
||||
}
|
|
@ -8,6 +8,7 @@ use crate::spec::tag::void::VOID_TAGS;
|
|||
use crate::unit::attr::{AttrType, process_attr, ProcessedAttr};
|
||||
use crate::unit::content::process_content;
|
||||
use crate::unit::script::js::process_js_script;
|
||||
use crate::unit::script::process_script;
|
||||
use crate::unit::script::text::process_text_script;
|
||||
use crate::unit::style::process_style;
|
||||
|
||||
|
@ -159,14 +160,17 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
|
|||
};
|
||||
|
||||
match tag_type {
|
||||
TagType::Script => if script_tag_type_is_js { process_js_script(proc)?; } else { process_text_script(proc)?; },
|
||||
TagType::Script => process_script(proc)?,
|
||||
TagType::Style => process_style(proc)?,
|
||||
_ => process_content(proc, Some(tag_name))?,
|
||||
};
|
||||
|
||||
// Require closing tag for non-void.
|
||||
chain!(proc.match_seq(b"</").require_with_reason("closing tag")?.discard());
|
||||
chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.discard());
|
||||
let closing_tag = chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.discard().range());
|
||||
if !proc[closing_tag].eq(proc[tag_name]) {
|
||||
return Err(ErrorType::ClosingTagMismatch);
|
||||
};
|
||||
chain!(proc.match_while_pred(is_whitespace).discard());
|
||||
chain!(proc.match_char(b'>').require()?.discard());
|
||||
Ok(ProcessedTag { name: tag_name, has_closing_tag: true })
|
||||
|
|
Loading…
Reference in New Issue