Remove unused code; handle escaped and double-escaped script data; allow using buffer instead of string source for bench; enforce matching tags

This commit is contained in:
Wilson Lin 2020-01-10 18:30:49 +11:00
parent 3ed4067304
commit e966f9a23b
15 changed files with 97 additions and 81 deletions

View File

@ -369,4 +369,4 @@ Special handling of some attributes require case sensitive names and values. For
`script` and `style` tags must be closed with `</script>` and `</style>` respectively (case sensitive).
[hyperbuild can handle text script content.](./notes/Text%20script%20content.md)
[hyperbuild can handle escaped and double-escaped script content.](./notes/Script%20data.md)

View File

@ -57,7 +57,7 @@ const setSize = (program, test, result) => {
for (const t of tests) {
for (const p of Object.keys(programs)) {
try {
setSize(p, t.name, programs[p](t.content).length);
setSize(p, t.name, programs[p](t.contentAsString, t.contentAsBuffer).length);
} catch (err) {
console.error(`Failed to run ${p} on test ${t.name}:`);
console.error(err);
@ -72,7 +72,7 @@ const runTest = test => new Promise((resolve, reject) => {
const suite = new benchmark.Suite();
for (const p of Object.keys(programs)) {
suite.add(p, () => {
programs[p](test.content);
programs[p](test.contentAsString, test.contentAsBuffer);
});
}
suite

View File

@ -7,7 +7,11 @@ pushd "$(dirname "$0")"
nodejs_cargo_toml="../nodejs/native/Cargo.toml"
rm -rf node_modules
cp "$nodejs_cargo_toml" "$nodejs_cargo_toml.orig"
if [ -f "$nodejs_cargo_toml.orig" ]; then
echo 'Not altering Node.js Cargo.toml file'
else
cp "$nodejs_cargo_toml" "$nodejs_cargo_toml.orig"
fi
sed -i 's%^hyperbuild = .*$%hyperbuild = { path = "../.." }%' "$nodejs_cargo_toml"
HYPERBUILD_NODEJS_SKIP_BIN_DOWNLOAD=1 npm i
mv "$nodejs_cargo_toml.orig" "$nodejs_cargo_toml"

View File

@ -3,7 +3,7 @@ const hyperbuild = require("hyperbuild");
const minimize = require("minimize");
module.exports = {
'hyperbuild-nodejs': content => hyperbuild.minify(content),
'hyperbuild-nodejs': (_, buffer) => hyperbuild.minify_in_place(Buffer.from(buffer)),
'html-minifier': content => htmlMinifier.minify(content, {
collapseBooleanAttributes: true,
collapseInlineTagWhitespace: true,

View File

@ -4,5 +4,6 @@ const path = require('path');
const testsDir = path.join(__dirname, 'tests');
module.exports = fs.readdirSync(testsDir).map(name => ({
name,
content: fs.readFileSync(path.join(testsDir, name), 'utf8'),
contentAsString: fs.readFileSync(path.join(testsDir, name), 'utf8'),
contentAsBuffer: fs.readFileSync(path.join(testsDir, name)),
})).sort((a, b) => a.name.localeCompare(b.name));

View File

@ -23,25 +23,16 @@ const fetchReactTypingsSource = async () => {
};
const processReactTypeDeclarations = async (source) => {
let tagNameToInterfaceMap;
let booleanAttributes = new Map();
const unvisited = [source];
while (unvisited.length) {
const node = unvisited.shift();
let matches;
switch (node.kind) {
case ts.SyntaxKind.InterfaceDeclaration:
const name = node.name.escapedText;
if (name === "ReactHTML") {
// Each member of ReactHTML looks something like:
//
// area: DetailedHTMLFactory<AreaHTMLAttributes<HTMLAreaElement>, HTMLAreaElement>;
// ^^^^ [1] ^^^^^^^^^^^^^^^ [2]
//
// Get mapping from tag name [1] to interface name [2].
tagNameToInterfaceMap = Object.fromEntries(node.members.map(m => [m.name.escapedText, m.type.typeArguments[1].typeName.escapedText]));
} else if ((matches = /^([A-Za-z]+)HTMLAttributes/.exec(name))) {
let matches;
if ((matches = /^([A-Za-z]+)HTMLAttributes/.exec(name))) {
const tagName = matches[1].toLowerCase();
if (!['all', 'webview'].includes(tagName)) {
node.members

View File

@ -1,6 +1,5 @@
{
"COMMENT_END": "-->",
"SCRIPT_END": "</script",
"STYLE_END": "</style",
"INSTRUCTION_END": "?>"
}

View File

@ -6,5 +6,9 @@ module.exports = {
const len = hyperbuild.minify(buf);
return buf.slice(0, len).toString();
},
minify_in_place: hyperbuild.minify,
minify_in_place: buf => {
const len = hyperbuild.minify(buf);
// This does not do a copy.
return buf.slice(0, len);
},
};

View File

@ -1,4 +1,6 @@
# Text script content
# Script data
For legacy reasons, special handling is required for content inside a script tag; see https://www.w3.org/TR/html52/syntax.html#script-data-state for more details.
```html
<script type="text/html">

View File

@ -2,10 +2,8 @@
#[derive(Debug)]
pub enum ErrorType {
EntityFollowingMalformedEntity,
ClosingTagMismatch,
NoSpaceBeforeAttr,
UnterminatedCssString,
UnterminatedJsString,
UnterminatedJsRegExp,
CharNotFound { need: u8, got: u8 },
MatchNotFound(&'static [u8]),
NotFound(&'static str),
@ -19,18 +17,12 @@ impl ErrorType {
ErrorType::EntityFollowingMalformedEntity => {
format!("Entity cannot follow malformed entity.")
}
ErrorType::ClosingTagMismatch => {
format!("Opening tag name does not match closing tag.")
}
ErrorType::NoSpaceBeforeAttr => {
format!("Space required before attribute.")
}
ErrorType::UnterminatedCssString => {
format!("Unterminated CSS string.")
}
ErrorType::UnterminatedJsString => {
format!("Unterminated JavaScript string.")
}
ErrorType::UnterminatedJsRegExp => {
format!("Unterminated JavaScript regular expression.")
}
ErrorType::CharNotFound { need, got } => {
format!("Expected {} (U+{:X}), got {} (U+{:X}).", need as char, need, got as char, got)
}

66
src/unit/script.rs Normal file
View File

@ -0,0 +1,66 @@
use crate::err::ProcessingResult;
use crate::proc::Processor;
// See https://www.w3.org/TR/html52/syntax.html#script-data-state and "notes/Script data.md".
enum State {
End,
Normal,
Escaped,
DoubleEscaped,
}
// TODO Optimise all functions in this file.
fn process_script_data_double_escaped(proc: &mut Processor) -> ProcessingResult<State> {
loop {
if chain!(proc.match_seq(b"</script").keep().matched()) {
return Ok(State::Escaped);
};
if chain!(proc.match_seq(b"-->").keep().matched()) {
return Ok(State::Normal);
};
proc.accept()?;
};
}
fn process_script_data_escaped(proc: &mut Processor) -> ProcessingResult<State> {
loop {
if chain!(proc.match_seq(b"<script").keep().matched()) {
return Ok(State::DoubleEscaped);
};
if chain!(proc.match_seq(b"</script").matched()) {
return Ok(State::End);
};
if chain!(proc.match_seq(b"-->").keep().matched()) {
return Ok(State::Normal);
};
proc.accept()?;
};
}
fn process_script_data(proc: &mut Processor) -> ProcessingResult<State> {
loop {
if chain!(proc.match_seq(b"</script").matched()) {
return Ok(State::End);
};
if chain!(proc.match_seq(b"<!--").keep().matched()) {
return Ok(State::Escaped);
};
proc.accept()?;
};
}
pub fn process_script(proc: &mut Processor) -> ProcessingResult<()> {
// NOTE: See "notes/Script data.md".
let mut state = State::Normal;
loop {
state = match state {
State::End => break,
State::Normal => process_script_data(proc)?,
State::Escaped => process_script_data_escaped(proc)?,
State::DoubleEscaped => process_script_data_double_escaped(proc)?,
};
};
Ok(())
}

View File

@ -1,10 +0,0 @@
use crate::err::{ProcessingResult};
use crate::proc::{Processor};
include!(concat!(env!("OUT_DIR"), "/gen_pattern_SCRIPT_END.rs"));
pub fn process_js_script(proc: &mut Processor) -> ProcessingResult<()> {
// `process_tag` will require closing tag.
chain!(proc.match_while_not_seq(SCRIPT_END).keep());
Ok(())
}

View File

@ -1,2 +0,0 @@
pub mod js;
pub mod text;

View File

@ -1,35 +0,0 @@
use crate::err::ProcessingResult;
use crate::proc::Processor;
pub fn process_text_script(proc: &mut Processor) -> ProcessingResult<()> {
// NOTE: See "notes/Text script content.md".
let mut in_comment = false;
let mut comment_has_unclosed_script = false;
loop {
// TODO Optimise
if chain!(proc.match_seq(b"<!--").keep().matched()) {
// NOTE: Could already be in comment, so don't reset `comment_has_unclosed_script`.
in_comment = true;
} else if chain!(proc.match_seq(b"-->").keep().matched()) {
comment_has_unclosed_script = false;
in_comment = false;
} else if in_comment && chain!(proc.match_seq(b"<script").keep().matched()) {
// TODO DOC Case sensitive, nothing else in tag.
// TODO Opening tag can have attributes, whitespace, etc.
chain!(proc.match_char(b'>').require()?.keep());
comment_has_unclosed_script = true;
} else if chain!(proc.match_seq(b"</script").matched()) {
if !comment_has_unclosed_script {
break;
}
comment_has_unclosed_script = false;
// Keep previously matched closing tag start.
proc.keep();
// TODO Close tag can have whitespace.
chain!(proc.match_char(b'>').require()?.keep());
} else {
proc.accept()?;
};
};
Ok(())
}

View File

@ -8,6 +8,7 @@ use crate::spec::tag::void::VOID_TAGS;
use crate::unit::attr::{AttrType, process_attr, ProcessedAttr};
use crate::unit::content::process_content;
use crate::unit::script::js::process_js_script;
use crate::unit::script::process_script;
use crate::unit::script::text::process_text_script;
use crate::unit::style::process_style;
@ -159,14 +160,17 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
};
match tag_type {
TagType::Script => if script_tag_type_is_js { process_js_script(proc)?; } else { process_text_script(proc)?; },
TagType::Script => process_script(proc)?,
TagType::Style => process_style(proc)?,
_ => process_content(proc, Some(tag_name))?,
};
// Require closing tag for non-void.
chain!(proc.match_seq(b"</").require_with_reason("closing tag")?.discard());
chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.discard());
let closing_tag = chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.discard().range());
if !proc[closing_tag].eq(proc[tag_name]) {
return Err(ErrorType::ClosingTagMismatch);
};
chain!(proc.match_while_pred(is_whitespace).discard());
chain!(proc.match_char(b'>').require()?.discard());
Ok(ProcessedTag { name: tag_name, has_closing_tag: true })