Allow more whitespace w.r.t. attributes; do not format bench test pages

This commit is contained in:
Wilson Lin 2020-01-14 21:10:49 +11:00
parent ba3e1917ce
commit 37115fef7a
6 changed files with 16 additions and 49 deletions

View File

@ -351,17 +351,6 @@ Numeric character references that do not reference a valid [Unicode Scalar Value
Backticks (`` ` ``) are not valid quote marks and not interpreted as such. Backticks (`` ` ``) are not valid quote marks and not interpreted as such.
However, backticks are valid attribute value quotes in Internet Explorer. However, backticks are valid attribute value quotes in Internet Explorer.
It is an error if there is:
- whitespace between `=` and an attribute name/value; and/or
- no whitespace before an attribute.
For example:
```html
<div id = "a"class="abc"></div>
```
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block) instead of JavaScript code. Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block) instead of JavaScript code.
### Script and style ### Script and style

View File

@ -1,7 +1,6 @@
const {promises: fs} = require('fs'); const {promises: fs} = require('fs');
const request = require('request-promise-native'); const request = require('request-promise-native');
const path = require('path'); const path = require('path');
const prettier = require('prettier');
const tests = { const tests = {
"Amazon": "https://www.amazon.com/", "Amazon": "https://www.amazon.com/",
@ -38,31 +37,18 @@ const fetchTest = async (name, url) => {
// Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity. // Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity.
for (const [name, html] of await Promise.all(Object.entries(tests).map(([name, url]) => fetchTest(name, url)))) { for (const [name, html] of await Promise.all(Object.entries(tests).map(([name, url]) => fetchTest(name, url)))) {
// Apply some fixes to HTML to allow strict formatter to work. // Apply some fixes to HTML.
const formatted = prettier.format( const fixed = html
html // Fix early termination of conditional comment in Amazon.
// Fix missing semicolon after entity in Amazon. .replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
.replace(/&#x200b/g, '&#x200b;') // Fix closing of void tag in Amazon.
// Fix consecutive malformed entities in Amazon. .replace(/><\/hr>/g, '/>')
.replace(/&& window.ue_sbl/g, '&amp&amp window.ue_sbl') // Fix extra '</div>' in BBC.
.replace(/&&pf_rd_p/g, '&amp&amppf_rd_p') .replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
// Fix early termination of conditional comment in Amazon. // Fix broken attribute value in Stack Overflow.
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->') .replace('height=151"', 'height="151"')
// Fix closing of void tag in Amazon. ;
.replace(/><\/hr>/g, '/>') await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), fixed);
// Fix extra '</div>' in BBC.
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
// Fix consecutive malformed entities in Google.
.replace(/&&google.aft/g, '&amp&ampgoogle.aft')
// Fix parser failing to parse unquoted attribute value starting with forward slash in Stack Overflow.
.replace('action=/search', 'action="/search"')
// Fix broken attribute value in Stack Overflow.
.replace('height=151"', 'height="151"')
,
{parser: 'html'},
);
console.log(`Formatted ${name}`);
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), formatted);
} }
})() })()
.catch(console.error); .catch(console.error);

View File

@ -8,7 +8,6 @@
"hyperbuild": "file:../nodejs", "hyperbuild": "file:../nodejs",
"minimize": "2.2.0", "minimize": "2.2.0",
"mkdirp": "^0.5.1", "mkdirp": "^0.5.1",
"prettier": "^1.19.1",
"request": "^2.88.0", "request": "^2.88.0",
"request-promise-native": "^1.0.8" "request-promise-native": "^1.0.8"
}, },

View File

@ -2,7 +2,6 @@
#[derive(Debug)] #[derive(Debug)]
pub enum ErrorType { pub enum ErrorType {
ClosingTagMismatch, ClosingTagMismatch,
NoSpaceBeforeAttr,
MatchNotFound(&'static [u8]), MatchNotFound(&'static [u8]),
NotFound(&'static str), NotFound(&'static str),
ExpectedChar(u8), ExpectedChar(u8),
@ -15,9 +14,6 @@ impl ErrorType {
ErrorType::ClosingTagMismatch => { ErrorType::ClosingTagMismatch => {
format!("Closing tag name does not match opening tag.") format!("Closing tag name does not match opening tag.")
} }
ErrorType::NoSpaceBeforeAttr => {
format!("Space required before attribute.")
}
ErrorType::MatchNotFound(seq) => { ErrorType::MatchNotFound(seq) => {
format!("Expected `{}`.", unsafe { std::str::from_utf8_unchecked(seq) }) format!("Expected `{}`.", unsafe { std::str::from_utf8_unchecked(seq) })
} }

View File

@ -2,7 +2,7 @@ use phf::{phf_set, Set};
use crate::err::ProcessingResult; use crate::err::ProcessingResult;
use crate::proc::{Processor, ProcessorRange}; use crate::proc::{Processor, ProcessorRange};
use crate::spec::codepoint::is_control; use crate::spec::codepoint::{is_control, is_whitespace};
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value}; use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value};
mod value; mod value;
@ -44,11 +44,13 @@ pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> Processing
let after_name = proc.checkpoint(); let after_name = proc.checkpoint();
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]); let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
let has_value = chain!(proc.match_char(b'=').keep().matched()); let has_value = chain!(proc.match_char(b'=').keep().matched());
let (typ, value) = if !has_value { let (typ, value) = if !has_value {
(AttrType::NoValue, None) (AttrType::NoValue, None)
} else { } else {
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
if is_boolean { if is_boolean {
skip_attr_value(proc)?; skip_attr_value(proc)?;
(AttrType::NoValue, None) (AttrType::NoValue, None)

View File

@ -108,17 +108,12 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
break; break;
} }
// This needs to be enforced as otherwise there would be difficulty in determining what is the end of a tag/attribute name/attribute value.
if !ws_accepted {
return Err(ErrorType::NoSpaceBeforeAttr);
}
// Mark attribute start in case we want to erase it completely. // Mark attribute start in case we want to erase it completely.
let attr_checkpoint = proc.checkpoint(); let attr_checkpoint = proc.checkpoint();
let mut erase_attr = false; let mut erase_attr = false;
// Write space after tag name or unquoted/valueless attribute. // Write space after tag name or unquoted/valueless attribute.
// Don't write after unquoted. // Don't write after quoted.
match last_attr_type { match last_attr_type {
Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '), Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '),
_ => {} _ => {}