Remove boolean attribute values

This commit is contained in:
Wilson Lin 2020-01-08 00:38:42 +11:00
parent 94071a8302
commit 9a9b543b26
8 changed files with 223 additions and 11 deletions

View File

@ -300,10 +300,12 @@ Some attributes have their whitespace (after any decoding) trimmed and collapsed
- `class`
`type` attributes on `script` tags with an empty value or value equaling a [JavaScript MIME type](https://mimesniff.spec.whatwg.org/#javascript-mime-type) are removed.
[Boolean attributes](./gen/boolean_attrs.json) will have their values removed.
`type` attributes on `script` tags with a value equaling a [JavaScript MIME type](https://mimesniff.spec.whatwg.org/#javascript-mime-type) are removed.
`type` attributes on `style` tags are removed.
If an attribute value is empty after any processing, it is completely removed (i.e. no `=`).
If an attribute value is empty after any processing, it is completely removed (i.e. no `=`), as an empty attribute is implicitly [the same](https://html.spec.whatwg.org/multipage/syntax.html#attributes-2) as an attribute with an empty string value.
Spaces are removed between attributes if possible.

View File

@ -263,6 +263,24 @@ fn build_pattern(pattern: String) -> String {
table.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "))
}
fn generate_boolean_attrs() {
let attrs: HashMap<String, Vec<String>> = read_json("boolean_attrs");
let mut code = String::new();
for (name, elems) in attrs.iter() {
code.push_str(format!(
"static {}_BOOLEAN_ATTR: &phf::Set<&'static [u8]> = &phf::phf_set!({});\n\n",
name.to_uppercase(),
elems.iter().map(|e| format!("b\"{}\"", e)).collect::<Vec<String>>().join(", "),
).as_str());
};
code.push_str("pub static BOOLEAN_ATTRS: phf::Map<&'static [u8], &'static phf::Set<&'static [u8]>> = phf::phf_map!{\n");
for name in attrs.keys() {
code.push_str(format!("\tb\"{}\" => {}_BOOLEAN_ATTR,\n", name, name.to_uppercase()).as_str());
};
code.push_str("};\n\n");
write_rs("boolean_attrs", code);
}
#[derive(Serialize, Deserialize)]
struct Entity {
codepoints: Vec<u32>,
@ -320,6 +338,7 @@ fn generate_tries() {
}
fn main() {
generate_boolean_attrs();
generate_entities();
generate_patterns();
generate_tries();

2
gen/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
node_modules/
build/*.d.ts

95
gen/boolean_attrs.json Normal file
View File

@ -0,0 +1,95 @@
{
"autofocus": [
"button",
"input",
"keygen",
"select",
"textarea"
],
"disabled": [
"button",
"fieldset",
"input",
"keygen",
"optgroup",
"option",
"select",
"textarea"
],
"formnovalidate": [
"button",
"input"
],
"open": [
"details",
"dialog"
],
"novalidate": [
"form"
],
"allowfullscreen": [
"iframe"
],
"allowtransparency": [
"iframe"
],
"seamless": [
"iframe"
],
"checked": [
"input"
],
"multiple": [
"input",
"select"
],
"readonly": [
"input",
"textarea"
],
"required": [
"input",
"select",
"textarea"
],
"autoplay": [
"media"
],
"controls": [
"media"
],
"loop": [
"media"
],
"muted": [
"media"
],
"playsinline": [
"media",
"video"
],
"reversed": [
"ol"
],
"selected": [
"option"
],
"async": [
"script"
],
"defer": [
"script"
],
"nomodule": [
"script"
],
"scoped": [
"style"
],
"default": [
"track"
],
"disablepictureinpicture": [
"video"
]
}

76
gen/build/dom.js Normal file
View File

@ -0,0 +1,76 @@
const request = require('request-promise-native');
const {promises: fs} = require('fs');
const ts = require('typescript');
const path = require('path');
const fromCamelCase = camelCase => camelCase.split(/(?=^|[A-Z])/).map(w => w.toLowerCase());
const BOOLEAN_ATTRS_PATH = path.join(__dirname, '..', 'boolean_attrs.json');
const REACT_TYPINGS_URL = 'https://raw.githubusercontent.com/DefinitelyTyped/DefinitelyTyped/master/types/react/index.d.ts';
const REACT_TYPINGS_FILE = path.join(__dirname, 'react.d.ts');
const get_react_typings_source = async () => {
try {
return await fs.readFile(REACT_TYPINGS_FILE, "utf8");
} catch (err) {
if (err.code !== "ENOENT") {
throw err;
}
const source = await request(REACT_TYPINGS_URL);
await fs.writeFile(REACT_TYPINGS_FILE, source);
return source;
}
};
const processReactTypeDeclarations = async (source) => {
let tagNameToInterfaceMap;
let booleanAttributes = new Map();
const unvisited = [source];
while (unvisited.length) {
const node = unvisited.shift();
let matches;
switch (node.kind) {
case ts.SyntaxKind.InterfaceDeclaration:
const name = node.name.escapedText;
if (name === "ReactHTML") {
// Each member of ReactHTML looks something like:
//
// area: DetailedHTMLFactory<AreaHTMLAttributes<HTMLAreaElement>, HTMLAreaElement>;
// ^^^^ [1] ^^^^^^^^^^^^^^^ [2]
//
// Get mapping from tag name [1] to interface name [2].
tagNameToInterfaceMap = Object.fromEntries(node.members.map(m => [m.name.escapedText, m.type.typeArguments[1].typeName.escapedText]));
} else if ((matches = /^([A-Za-z]+)HTMLAttributes/.exec(name))) {
const tagName = matches[1].toLowerCase();
if (!['all', 'webview'].includes(tagName)) {
node.members
.filter(n => n.kind === ts.SyntaxKind.PropertySignature)
.filter(n => n.type.kind === ts.SyntaxKind.BooleanKeyword)
.map(n => n.name.escapedText)
.forEach(attr => {
attr = attr.toLowerCase();
if (!booleanAttributes.has(attr)) {
booleanAttributes.set(attr, []);
}
booleanAttributes.get(attr).push(tagName);
});
}
}
break;
}
// forEachChild doesn't seem to work if return value is number (e.g. Array.prototype.push return value).
node.forEachChild(c => void unvisited.push(c));
}
await fs.writeFile(BOOLEAN_ATTRS_PATH, JSON.stringify(
Object.fromEntries(booleanAttributes.entries()),
null,
2,
));
};
(async () => {
const source = ts.createSourceFile(`react.d.ts`, await get_react_typings_source(), ts.ScriptTarget.ES2019);
await processReactTypeDeclarations(source);
})();

8
gen/build/package.json Normal file
View File

@ -0,0 +1,8 @@
{
"private": true,
"dependencies": {
"request": "^2.88.0",
"request-promise-native": "^1.0.8",
"typescript": "^3.7.4"
}
}

View File

@ -7,6 +7,8 @@ use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrV
mod value;
include!(concat!(env!("OUT_DIR"), "/gen_boolean_attrs.rs"));
static COLLAPSIBLE_AND_TRIMMABLE_ATTRS: Set<&'static [u8]> = phf_set! {
b"class",
};
@ -34,10 +36,11 @@ fn is_name_char(c: u8) -> bool {
}
}
pub fn process_attr(proc: &mut Processor) -> ProcessingResult<ProcessedAttr> {
pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> ProcessingResult<ProcessedAttr> {
// It's possible to expect attribute name but not be called at an attribute, e.g. due to whitespace between name and
// value, which causes name to be considered boolean attribute and `=` to be start of new (invalid) attribute name.
let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().range());
let is_boolean = BOOLEAN_ATTRS.get(&proc[name]).filter(|elems| elems.contains(&proc[element])).is_some();
let after_name = proc.checkpoint();
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
@ -46,14 +49,21 @@ pub fn process_attr(proc: &mut Processor) -> ProcessingResult<ProcessedAttr> {
let (typ, value) = if !has_value {
(AttrType::NoValue, None)
} else {
match process_attr_value(proc, should_collapse_and_trim_value_ws)? {
ProcessedAttrValue { value: None, .. } => {
// Value is empty, which is equivalent to no value, so discard `=` and any quotes.
proc.erase_written(after_name);
(AttrType::NoValue, None)
// TODO Don't process if going to erase anyway.
let val = process_attr_value(proc, should_collapse_and_trim_value_ws)?;
if is_boolean {
proc.erase_written(after_name);
(AttrType::NoValue, None)
} else {
match val {
ProcessedAttrValue { value: None, .. } => {
// Value is empty, which is equivalent to no value, so discard `=` and any quotes.
proc.erase_written(after_name);
(AttrType::NoValue, None)
}
ProcessedAttrValue { delimiter: DelimiterType::Unquoted, value } => (AttrType::Unquoted, value),
ProcessedAttrValue { delimiter: DelimiterType::Double, value } | ProcessedAttrValue { delimiter: DelimiterType::Single, value } => (AttrType::Quoted, value),
}
ProcessedAttrValue { delimiter: DelimiterType::Unquoted, value } => (AttrType::Unquoted, value),
ProcessedAttrValue { delimiter: DelimiterType::Double, value } | ProcessedAttrValue { delimiter: DelimiterType::Single, value } => (AttrType::Quoted, value),
}
};

View File

@ -125,7 +125,7 @@ pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<Proces
_ => {}
};
let ProcessedAttr { name, typ, value } = process_attr(proc)?;
let ProcessedAttr { name, typ, value } = process_attr(proc, tag_name)?;
match (tag_type, &proc[name]) {
(TagType::Script, b"type") => {
// It's JS if the value is empty or one of `JAVASCRIPT_MIME_TYPES`.