Allow more whitespace w.r.t. attributes; do not format bench test pages
This commit is contained in:
parent
ba3e1917ce
commit
37115fef7a
11
README.md
11
README.md
|
@ -351,17 +351,6 @@ Numeric character references that do not reference a valid [Unicode Scalar Value
|
|||
Backticks (`` ` ``) are not valid quote marks and not interpreted as such.
|
||||
However, backticks are valid attribute value quotes in Internet Explorer.
|
||||
|
||||
It is an error if there is:
|
||||
|
||||
- whitespace between `=` and an attribute name/value; and/or
|
||||
- no whitespace before an attribute.
|
||||
|
||||
For example:
|
||||
|
||||
```html
|
||||
<div id = "a"class="abc"></div>
|
||||
```
|
||||
|
||||
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block) instead of JavaScript code.
|
||||
|
||||
### Script and style
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
const {promises: fs} = require('fs');
|
||||
const request = require('request-promise-native');
|
||||
const path = require('path');
|
||||
const prettier = require('prettier');
|
||||
|
||||
const tests = {
|
||||
"Amazon": "https://www.amazon.com/",
|
||||
|
@ -38,31 +37,18 @@ const fetchTest = async (name, url) => {
|
|||
|
||||
// Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity.
|
||||
for (const [name, html] of await Promise.all(Object.entries(tests).map(([name, url]) => fetchTest(name, url)))) {
|
||||
// Apply some fixes to HTML to allow strict formatter to work.
|
||||
const formatted = prettier.format(
|
||||
html
|
||||
// Fix missing semicolon after entity in Amazon.
|
||||
.replace(/​/g, '​')
|
||||
// Fix consecutive malformed entities in Amazon.
|
||||
.replace(/&& window.ue_sbl/g, '&& window.ue_sbl')
|
||||
.replace(/&&pf_rd_p/g, '&&pf_rd_p')
|
||||
// Fix early termination of conditional comment in Amazon.
|
||||
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
|
||||
// Fix closing of void tag in Amazon.
|
||||
.replace(/><\/hr>/g, '/>')
|
||||
// Fix extra '</div>' in BBC.
|
||||
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
|
||||
// Fix consecutive malformed entities in Google.
|
||||
.replace(/&&google.aft/g, '&&google.aft')
|
||||
// Fix parser failing to parse unquoted attribute value starting with forward slash in Stack Overflow.
|
||||
.replace('action=/search', 'action="/search"')
|
||||
// Fix broken attribute value in Stack Overflow.
|
||||
.replace('height=151"', 'height="151"')
|
||||
,
|
||||
{parser: 'html'},
|
||||
);
|
||||
console.log(`Formatted ${name}`);
|
||||
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), formatted);
|
||||
// Apply some fixes to HTML.
|
||||
const fixed = html
|
||||
// Fix early termination of conditional comment in Amazon.
|
||||
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
|
||||
// Fix closing of void tag in Amazon.
|
||||
.replace(/><\/hr>/g, '/>')
|
||||
// Fix extra '</div>' in BBC.
|
||||
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
|
||||
// Fix broken attribute value in Stack Overflow.
|
||||
.replace('height=151"', 'height="151"')
|
||||
;
|
||||
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), fixed);
|
||||
}
|
||||
})()
|
||||
.catch(console.error);
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
"hyperbuild": "file:../nodejs",
|
||||
"minimize": "2.2.0",
|
||||
"mkdirp": "^0.5.1",
|
||||
"prettier": "^1.19.1",
|
||||
"request": "^2.88.0",
|
||||
"request-promise-native": "^1.0.8"
|
||||
},
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
#[derive(Debug)]
|
||||
pub enum ErrorType {
|
||||
ClosingTagMismatch,
|
||||
NoSpaceBeforeAttr,
|
||||
MatchNotFound(&'static [u8]),
|
||||
NotFound(&'static str),
|
||||
ExpectedChar(u8),
|
||||
|
@ -15,9 +14,6 @@ impl ErrorType {
|
|||
ErrorType::ClosingTagMismatch => {
|
||||
format!("Closing tag name does not match opening tag.")
|
||||
}
|
||||
ErrorType::NoSpaceBeforeAttr => {
|
||||
format!("Space required before attribute.")
|
||||
}
|
||||
ErrorType::MatchNotFound(seq) => {
|
||||
format!("Expected `{}`.", unsafe { std::str::from_utf8_unchecked(seq) })
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ use phf::{phf_set, Set};
|
|||
|
||||
use crate::err::ProcessingResult;
|
||||
use crate::proc::{Processor, ProcessorRange};
|
||||
use crate::spec::codepoint::is_control;
|
||||
use crate::spec::codepoint::{is_control, is_whitespace};
|
||||
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value};
|
||||
|
||||
mod value;
|
||||
|
@ -44,11 +44,13 @@ pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> Processing
|
|||
let after_name = proc.checkpoint();
|
||||
|
||||
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
|
||||
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
|
||||
let has_value = chain!(proc.match_char(b'=').keep().matched());
|
||||
|
||||
let (typ, value) = if !has_value {
|
||||
(AttrType::NoValue, None)
|
||||
} else {
|
||||
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
|
||||
if is_boolean {
|
||||
skip_attr_value(proc)?;
|
||||
(AttrType::NoValue, None)
|
||||
|
|
|
@ -108,17 +108,12 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
|
|||
break;
|
||||
}
|
||||
|
||||
// This needs to be enforced as otherwise there would be difficulty in determining what is the end of a tag/attribute name/attribute value.
|
||||
if !ws_accepted {
|
||||
return Err(ErrorType::NoSpaceBeforeAttr);
|
||||
}
|
||||
|
||||
// Mark attribute start in case we want to erase it completely.
|
||||
let attr_checkpoint = proc.checkpoint();
|
||||
let mut erase_attr = false;
|
||||
|
||||
// Write space after tag name or unquoted/valueless attribute.
|
||||
// Don't write after unquoted.
|
||||
// Don't write after quoted.
|
||||
match last_attr_type {
|
||||
Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '),
|
||||
_ => {}
|
||||
|
|
Loading…
Reference in New Issue