Allow more whitespace w.r.t. attributes; do not format bench test pages

This commit is contained in:
Wilson Lin 2020-01-14 21:10:49 +11:00
parent ba3e1917ce
commit 37115fef7a
6 changed files with 16 additions and 49 deletions

View File

@ -351,17 +351,6 @@ Numeric character references that do not reference a valid [Unicode Scalar Value
Backticks (`` ` ``) are not valid quote marks and not interpreted as such.
However, backticks are valid attribute value quotes in Internet Explorer.
It is an error if there is:
- whitespace between `=` and an attribute name/value; and/or
- no whitespace before an attribute.
For example:
```html
<div id = "a"class="abc"></div>
```
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block) instead of JavaScript code.
### Script and style

View File

@ -1,7 +1,6 @@
const {promises: fs} = require('fs');
const request = require('request-promise-native');
const path = require('path');
const prettier = require('prettier');
const tests = {
"Amazon": "https://www.amazon.com/",
@ -38,31 +37,18 @@ const fetchTest = async (name, url) => {
// Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity.
for (const [name, html] of await Promise.all(Object.entries(tests).map(([name, url]) => fetchTest(name, url)))) {
// Apply some fixes to HTML to allow strict formatter to work.
const formatted = prettier.format(
html
// Fix missing semicolon after entity in Amazon.
.replace(/&#x200b/g, '&#x200b;')
// Fix consecutive malformed entities in Amazon.
.replace(/&& window.ue_sbl/g, '&amp&amp window.ue_sbl')
.replace(/&&pf_rd_p/g, '&amp&amppf_rd_p')
// Fix early termination of conditional comment in Amazon.
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
// Fix closing of void tag in Amazon.
.replace(/><\/hr>/g, '/>')
// Fix extra '</div>' in BBC.
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
// Fix consecutive malformed entities in Google.
.replace(/&&google.aft/g, '&amp&ampgoogle.aft')
// Fix parser failing to parse unquoted attribute value starting with forward slash in Stack Overflow.
.replace('action=/search', 'action="/search"')
// Fix broken attribute value in Stack Overflow.
.replace('height=151"', 'height="151"')
,
{parser: 'html'},
);
console.log(`Formatted ${name}`);
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), formatted);
// Apply some fixes to HTML.
const fixed = html
// Fix early termination of conditional comment in Amazon.
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
// Fix closing of void tag in Amazon.
.replace(/><\/hr>/g, '/>')
// Fix extra '</div>' in BBC.
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
// Fix broken attribute value in Stack Overflow.
.replace('height=151"', 'height="151"')
;
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), fixed);
}
})()
.catch(console.error);

View File

@ -8,7 +8,6 @@
"hyperbuild": "file:../nodejs",
"minimize": "2.2.0",
"mkdirp": "^0.5.1",
"prettier": "^1.19.1",
"request": "^2.88.0",
"request-promise-native": "^1.0.8"
},

View File

@ -2,7 +2,6 @@
#[derive(Debug)]
pub enum ErrorType {
ClosingTagMismatch,
NoSpaceBeforeAttr,
MatchNotFound(&'static [u8]),
NotFound(&'static str),
ExpectedChar(u8),
@ -15,9 +14,6 @@ impl ErrorType {
ErrorType::ClosingTagMismatch => {
format!("Closing tag name does not match opening tag.")
}
ErrorType::NoSpaceBeforeAttr => {
format!("Space required before attribute.")
}
ErrorType::MatchNotFound(seq) => {
format!("Expected `{}`.", unsafe { std::str::from_utf8_unchecked(seq) })
}

View File

@ -2,7 +2,7 @@ use phf::{phf_set, Set};
use crate::err::ProcessingResult;
use crate::proc::{Processor, ProcessorRange};
use crate::spec::codepoint::is_control;
use crate::spec::codepoint::{is_control, is_whitespace};
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value};
mod value;
@ -44,11 +44,13 @@ pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> Processing
let after_name = proc.checkpoint();
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
let has_value = chain!(proc.match_char(b'=').keep().matched());
let (typ, value) = if !has_value {
(AttrType::NoValue, None)
} else {
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
if is_boolean {
skip_attr_value(proc)?;
(AttrType::NoValue, None)

View File

@ -108,17 +108,12 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
break;
}
// This needs to be enforced as otherwise there would be difficulty in determining what is the end of a tag/attribute name/attribute value.
if !ws_accepted {
return Err(ErrorType::NoSpaceBeforeAttr);
}
// Mark attribute start in case we want to erase it completely.
let attr_checkpoint = proc.checkpoint();
let mut erase_attr = false;
// Write space after tag name or unquoted/valueless attribute.
// Don't write after unquoted.
// Don't write after quoted.
match last_attr_type {
Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '),
_ => {}