Allow more whitespace w.r.t. attributes; do not format bench test pages
This commit is contained in:
parent
ba3e1917ce
commit
37115fef7a
11
README.md
11
README.md
|
@ -351,17 +351,6 @@ Numeric character references that do not reference a valid [Unicode Scalar Value
|
||||||
Backticks (`` ` ``) are not valid quote marks and not interpreted as such.
|
Backticks (`` ` ``) are not valid quote marks and not interpreted as such.
|
||||||
However, backticks are valid attribute value quotes in Internet Explorer.
|
However, backticks are valid attribute value quotes in Internet Explorer.
|
||||||
|
|
||||||
It is an error if there is:
|
|
||||||
|
|
||||||
- whitespace between `=` and an attribute name/value; and/or
|
|
||||||
- no whitespace before an attribute.
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
```html
|
|
||||||
<div id = "a"class="abc"></div>
|
|
||||||
```
|
|
||||||
|
|
||||||
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block) instead of JavaScript code.
|
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block) instead of JavaScript code.
|
||||||
|
|
||||||
### Script and style
|
### Script and style
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
const {promises: fs} = require('fs');
|
const {promises: fs} = require('fs');
|
||||||
const request = require('request-promise-native');
|
const request = require('request-promise-native');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const prettier = require('prettier');
|
|
||||||
|
|
||||||
const tests = {
|
const tests = {
|
||||||
"Amazon": "https://www.amazon.com/",
|
"Amazon": "https://www.amazon.com/",
|
||||||
|
@ -38,31 +37,18 @@ const fetchTest = async (name, url) => {
|
||||||
|
|
||||||
// Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity.
|
// Format after fetching as formatting is synchronous and can take so long that connections get dropped by server due to inactivity.
|
||||||
for (const [name, html] of await Promise.all(Object.entries(tests).map(([name, url]) => fetchTest(name, url)))) {
|
for (const [name, html] of await Promise.all(Object.entries(tests).map(([name, url]) => fetchTest(name, url)))) {
|
||||||
// Apply some fixes to HTML to allow strict formatter to work.
|
// Apply some fixes to HTML.
|
||||||
const formatted = prettier.format(
|
const fixed = html
|
||||||
html
|
|
||||||
// Fix missing semicolon after entity in Amazon.
|
|
||||||
.replace(/​/g, '​')
|
|
||||||
// Fix consecutive malformed entities in Amazon.
|
|
||||||
.replace(/&& window.ue_sbl/g, '&& window.ue_sbl')
|
|
||||||
.replace(/&&pf_rd_p/g, '&&pf_rd_p')
|
|
||||||
// Fix early termination of conditional comment in Amazon.
|
// Fix early termination of conditional comment in Amazon.
|
||||||
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
|
.replace('--></style>\n<![endif]-->', '</style>\n<![endif]-->')
|
||||||
// Fix closing of void tag in Amazon.
|
// Fix closing of void tag in Amazon.
|
||||||
.replace(/><\/hr>/g, '/>')
|
.replace(/><\/hr>/g, '/>')
|
||||||
// Fix extra '</div>' in BBC.
|
// Fix extra '</div>' in BBC.
|
||||||
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
|
.replace('</a></span></small></div></div></div></footer>', '</a></span></small></div></div></footer>')
|
||||||
// Fix consecutive malformed entities in Google.
|
|
||||||
.replace(/&&google.aft/g, '&&google.aft')
|
|
||||||
// Fix parser failing to parse unquoted attribute value starting with forward slash in Stack Overflow.
|
|
||||||
.replace('action=/search', 'action="/search"')
|
|
||||||
// Fix broken attribute value in Stack Overflow.
|
// Fix broken attribute value in Stack Overflow.
|
||||||
.replace('height=151"', 'height="151"')
|
.replace('height=151"', 'height="151"')
|
||||||
,
|
;
|
||||||
{parser: 'html'},
|
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), fixed);
|
||||||
);
|
|
||||||
console.log(`Formatted ${name}`);
|
|
||||||
await fs.writeFile(path.join(__dirname, 'tests', `${name}.html`), formatted);
|
|
||||||
}
|
}
|
||||||
})()
|
})()
|
||||||
.catch(console.error);
|
.catch(console.error);
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
"hyperbuild": "file:../nodejs",
|
"hyperbuild": "file:../nodejs",
|
||||||
"minimize": "2.2.0",
|
"minimize": "2.2.0",
|
||||||
"mkdirp": "^0.5.1",
|
"mkdirp": "^0.5.1",
|
||||||
"prettier": "^1.19.1",
|
|
||||||
"request": "^2.88.0",
|
"request": "^2.88.0",
|
||||||
"request-promise-native": "^1.0.8"
|
"request-promise-native": "^1.0.8"
|
||||||
},
|
},
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum ErrorType {
|
pub enum ErrorType {
|
||||||
ClosingTagMismatch,
|
ClosingTagMismatch,
|
||||||
NoSpaceBeforeAttr,
|
|
||||||
MatchNotFound(&'static [u8]),
|
MatchNotFound(&'static [u8]),
|
||||||
NotFound(&'static str),
|
NotFound(&'static str),
|
||||||
ExpectedChar(u8),
|
ExpectedChar(u8),
|
||||||
|
@ -15,9 +14,6 @@ impl ErrorType {
|
||||||
ErrorType::ClosingTagMismatch => {
|
ErrorType::ClosingTagMismatch => {
|
||||||
format!("Closing tag name does not match opening tag.")
|
format!("Closing tag name does not match opening tag.")
|
||||||
}
|
}
|
||||||
ErrorType::NoSpaceBeforeAttr => {
|
|
||||||
format!("Space required before attribute.")
|
|
||||||
}
|
|
||||||
ErrorType::MatchNotFound(seq) => {
|
ErrorType::MatchNotFound(seq) => {
|
||||||
format!("Expected `{}`.", unsafe { std::str::from_utf8_unchecked(seq) })
|
format!("Expected `{}`.", unsafe { std::str::from_utf8_unchecked(seq) })
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ use phf::{phf_set, Set};
|
||||||
|
|
||||||
use crate::err::ProcessingResult;
|
use crate::err::ProcessingResult;
|
||||||
use crate::proc::{Processor, ProcessorRange};
|
use crate::proc::{Processor, ProcessorRange};
|
||||||
use crate::spec::codepoint::is_control;
|
use crate::spec::codepoint::{is_control, is_whitespace};
|
||||||
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value};
|
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value};
|
||||||
|
|
||||||
mod value;
|
mod value;
|
||||||
|
@ -44,11 +44,13 @@ pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> Processing
|
||||||
let after_name = proc.checkpoint();
|
let after_name = proc.checkpoint();
|
||||||
|
|
||||||
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
|
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
|
||||||
|
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
|
||||||
let has_value = chain!(proc.match_char(b'=').keep().matched());
|
let has_value = chain!(proc.match_char(b'=').keep().matched());
|
||||||
|
|
||||||
let (typ, value) = if !has_value {
|
let (typ, value) = if !has_value {
|
||||||
(AttrType::NoValue, None)
|
(AttrType::NoValue, None)
|
||||||
} else {
|
} else {
|
||||||
|
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
|
||||||
if is_boolean {
|
if is_boolean {
|
||||||
skip_attr_value(proc)?;
|
skip_attr_value(proc)?;
|
||||||
(AttrType::NoValue, None)
|
(AttrType::NoValue, None)
|
||||||
|
|
|
@ -108,17 +108,12 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This needs to be enforced as otherwise there would be difficulty in determining what is the end of a tag/attribute name/attribute value.
|
|
||||||
if !ws_accepted {
|
|
||||||
return Err(ErrorType::NoSpaceBeforeAttr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mark attribute start in case we want to erase it completely.
|
// Mark attribute start in case we want to erase it completely.
|
||||||
let attr_checkpoint = proc.checkpoint();
|
let attr_checkpoint = proc.checkpoint();
|
||||||
let mut erase_attr = false;
|
let mut erase_attr = false;
|
||||||
|
|
||||||
// Write space after tag name or unquoted/valueless attribute.
|
// Write space after tag name or unquoted/valueless attribute.
|
||||||
// Don't write after unquoted.
|
// Don't write after quoted.
|
||||||
match last_attr_type {
|
match last_attr_type {
|
||||||
Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '),
|
Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '),
|
||||||
_ => {}
|
_ => {}
|
||||||
|
|
Loading…
Reference in New Issue