Minify SVG attributes; minor QoL improvements

This commit is contained in:
Wilson 2020-01-24 03:53:09 +13:00
parent 24951cb7ee
commit 54b65bbd0c
10 changed files with 4080 additions and 1434 deletions

View File

@ -8,6 +8,8 @@ const colours = {
'html-minifier': '#2ca02c',
};
const percentageTick = t => (Math.round(t * 1000) / 10).toFixed(1) + '%';
const chartOptions = (title, displayLegend, yTick = t => t) => ({
options: {
title: {
@ -65,7 +67,7 @@ const renderChart = async (cfg) => {
data: averageSpeeds.map(([_, v]) => v),
}],
},
...chartOptions('Average operations per second (higher is better)', false, tick => `${tick * 100}%`),
...chartOptions('Average operations per second (higher is better)', false, percentageTick),
}));
const speeds = results.getSpeedResults().getRelativeFileSpeedsPerMinifier('hyperbuild-nodejs');
@ -79,7 +81,7 @@ const renderChart = async (cfg) => {
data: fileSpeeds.map(([_, speed]) => speed),
})),
},
...chartOptions('Operations per second (higher is better)', true, tick => `${tick * 100}%`),
...chartOptions('Operations per second (higher is better)', true, percentageTick),
}));
const averageSizes = results.getSizeResults().getAverageRelativeSizePerMinifier();
@ -94,7 +96,7 @@ const renderChart = async (cfg) => {
data: averageSizes.map(([_, v]) => v),
}],
},
...chartOptions('Average minified size (lower is better)', false, tick => `${tick * 100}%`),
...chartOptions('Average minified size (lower is better)', false, percentageTick),
}));
const sizes = results.getSizeResults().getRelativeFileSizesPerMinifier();
@ -108,6 +110,6 @@ const renderChart = async (cfg) => {
data: fileSizes.map(([_, size]) => size),
})),
},
...chartOptions('Minified size (lower is better)', true, tick => `${tick * 100}%`),
...chartOptions('Minified size (lower is better)', true, percentageTick),
}));
})();

View File

@ -2,7 +2,7 @@ const fs = require('fs');
const path = require('path');
const testsDir = path.join(__dirname, 'tests');
module.exports = fs.readdirSync(testsDir).map(name => ({
module.exports = fs.readdirSync(testsDir).filter(f => !/^\./.test(f)).map(name => ({
name,
contentAsString: fs.readFileSync(path.join(testsDir, name), 'utf8'),
contentAsBuffer: fs.readFileSync(path.join(testsDir, name)),

View File

@ -118,33 +118,50 @@ impl TagAttr {
}
fn generate_attr_map() {
let attrs: HashMap<String, HashMap<String, TagAttr>> = read_json("attrs");
let attrs: HashMap<String, HashMap<String, HashMap<String, TagAttr>>> = read_json("attrs");
let mut code = String::new();
for (attr_name, tags_map) in attrs.iter() {
if let Some(global_attr) = tags_map.get("") {
code.push_str(format!(
"static {}_ATTR: &AttrMapEntry = &AttrMapEntry::AllHtmlElements({});\n\n",
attr_name.to_uppercase(),
global_attr.code(),
).as_str());
} else {
code.push_str(format!(
"static {}_ATTR: &AttrMapEntry = &AttrMapEntry::DistinctHtmlElements(phf::phf_map! {{\n{}\n}});\n\n",
attr_name.to_uppercase(),
tags_map
.iter()
.map(|(tag_name, tag_attr)| format!(
"b\"{}\" => {}",
tag_name,
tag_attr.code(),
))
.collect::<Vec<String>>()
.join(",\n"),
).as_str());
for (attr_name, namespaces) in attrs.iter() {
let mut by_namespace_code = String::new();
by_namespace_code.push_str(format!("static {}_ATTR: ByNamespace = ByNamespace {{\n", attr_name.to_uppercase()).as_str());
for namespace in ["html".to_string(), "svg".to_string()].iter() {
by_namespace_code.push_str(format!("\t{}: ", namespace).as_str());
match namespaces.get(namespace) {
None => by_namespace_code.push_str("None"),
Some(tags_map) => {
if let Some(global_attr) = tags_map.get("*") {
code.push_str(format!(
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements({});\n\n",
namespace.to_uppercase(),
attr_name.to_uppercase(),
global_attr.code(),
).as_str());
} else {
code.push_str(format!(
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {{\n{}\n}});\n\n",
namespace.to_uppercase(),
attr_name.to_uppercase(),
tags_map
.iter()
.map(|(tag_name, tag_attr)| format!(
"b\"{}\" => {}",
tag_name,
tag_attr.code(),
))
.collect::<Vec<String>>()
.join(",\n"),
).as_str());
};
by_namespace_code.push_str(format!("Some({}_{}_ATTR)", namespace.to_uppercase(), attr_name.to_uppercase()).as_str());
}
};
by_namespace_code.push_str(",\n");
};
by_namespace_code.push_str("};\n\n");
code.push_str(&by_namespace_code);
};
code.push_str("pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n");
for attr_name in attrs.keys() {
for (attr_name, namespaces) in attrs.iter() {
for (namespace, tags_map) in namespaces.iter() {}
code.push_str(format!("\tb\"{}\" => {}_ATTR,\n", attr_name, attr_name.to_uppercase()).as_str());
};
code.push_str("});\n\n");

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,14 @@ const {promises: fs} = require('fs');
const ts = require('typescript');
const path = require('path');
const compareEntryNames = (a, b) => a[0].localeCompare(b[0]);
const deepObjectifyMap = map => Object.fromEntries(
[...map.entries()]
.map(([key, value]) => [key, value instanceof Map ? deepObjectifyMap(value) : value])
.sort(compareEntryNames)
);
const fromCamelCase = camelCase => camelCase.split(/(?=^|[A-Z])/).map(w => w.toLowerCase());
const prettyjson = v => JSON.stringify(v, null, 2);
const ATTRS_PATH = path.join(__dirname, '..', 'attrs.json');
@ -38,83 +45,84 @@ const reactSpecificAttributes = [
// TODO This is currently manually sourced and written. Try to get machine-readable spec and automate.
const defaultAttributeValues = {
'align': [{
tags: ['img'],
tags: ['html:img'],
defaultValue: 'bottom',
}],
'decoding': [{
tags: ['img'],
tags: ['html:img'],
defaultValue: 'auto',
}],
'enctype': [{
tags: ['form'],
tags: ['html:form'],
defaultValue: 'application/x-www-form-urlencoded',
}],
'frameborder': [{
tags: ['iframe'],
tags: ['html:iframe'],
defaultValue: '1',
isPositiveInteger: true,
}],
'formenctype': [{
tags: ['button', 'input'],
tags: ['html:button', 'html:input'],
defaultValue: 'application/x-www-form-urlencoded',
}],
'height': [{
tags: ['iframe'],
tags: ['html:iframe'],
defaultValue: '150',
isPositiveInteger: true,
}],
'importance': [{
tags: ['iframe'],
tags: ['html:iframe'],
defaultValue: 'auto',
}],
'loading': [{
tags: ['iframe', 'img'],
tags: ['html:iframe', 'html:img'],
defaultValue: 'eager',
}],
'media': [{
tags: ['style'],
tags: ['html:style'],
defaultValue: 'all',
}],
'method': [{
tags: ['form'],
tags: ['html:form'],
defaultValue: 'get',
}],
'referrerpolicy': [{
tags: ['iframe', 'img'],
tags: ['html:iframe', 'html:img'],
defaultValue: 'no-referrer-when-downgrade',
}],
'rules': [{
tags: ['table'],
tags: ['html:table'],
defaultValue: 'none',
}],
'span': [{
tags: ['col', 'colgroup'],
tags: ['html:col', 'html:colgroup'],
defaultValue: '1',
isPositiveInteger: true,
}],
'target': [{
tags: ['a', 'form'],
tags: ['html:a', 'html:form'],
defaultValue: '_self',
}],
'type': [{
tags: ['button'],
tags: ['html:button'],
defaultValue: 'submit',
}, {
tags: ['input'],
tags: ['html:input'],
defaultValue: 'text',
}, {
tags: ['link', 'style'],
tags: ['html:link', 'html:style'],
defaultValue: 'text/css',
}],
'width': [{
tags: ['iframe'],
tags: ['html:iframe'],
defaultValue: '300',
isPositiveInteger: true,
}],
}]
};
const collapsibleAndTrimmable = {
'class': [''],
'class': ['html:*'],
'd': ['svg:*'],
};
// TODO Is escapedText the API for getting name?
@ -130,20 +138,23 @@ const processReactTypeDeclarations = async (source) => {
}
const attributeNodes = nodes
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
.map(n => [/^([A-Za-z]*)HTMLAttributes/.exec(getNameOfNode(n)), n])
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
.filter(([matches]) => matches)
.map(([matches, node]) => [normaliseName(matches[1], tagNameNormalised), node])
.filter(([tagName]) => !['all', 'webview'].includes(tagName))
.sort((a, b) => a[0].localeCompare(b[0]));
.map(([matches, node]) => [matches[2].toLowerCase(), normaliseName(matches[1], tagNameNormalised), node])
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
.map(([namespace, tag, node]) => ({namespace, tag, node}))
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
// Process global attributes first as they also appear on some specific tags but we don't want to keep the specific ones if they're global.
if (attributeNodes[0][0] !== '') {
throw new Error(`Global attributes is not first to be processed`);
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
throw new Error(`Global HTML attributes is not first to be processed`);
}
// Map structure: attr => namespace => tag => config.
const attributes = new Map();
for (const [tagName, node] of attributeNodes) {
for (const {namespace, tag, node} of attributeNodes) {
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
for (const n of node.members.filter(n => n.kind === ts.SyntaxKind.PropertySignature)) {
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
if (reactSpecificAttributes.includes(attrName)) continue;
@ -157,11 +168,11 @@ const processReactTypeDeclarations = async (source) => {
const redundantIfEmpty = !boolean &&
(types.includes(ts.SyntaxKind.StringKeyword) || types.includes(ts.SyntaxKind.NumberKeyword));
const defaultValue = (defaultAttributeValues[attrName] || [])
.filter(a => a.tags.includes(tagName))
.filter(a => a.tags.includes(fullyQualifiedTagName))
.map(a => a.defaultValue);
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(tagName);
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
if (defaultValue.length > 1) {
throw new Error(`Tag-attribute combination has multiple default values: ${defaultValue}`);
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValue}`);
}
const attr = {
boolean,
@ -171,36 +182,27 @@ const processReactTypeDeclarations = async (source) => {
};
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
const tagsForAttribute = attributes.get(attrName);
if (tagsForAttribute.has(tagName)) throw new Error(`Duplicate tag-attribute combination: <${tagName} ${attrName}>`);
const namespacesForAttribute = attributes.get(attrName);
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
const tagsForNSAttribute = namespacesForAttribute.get(namespace);
if (tagsForNSAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
const globalAttr = tagsForAttribute.get('');
const globalAttr = tagsForNSAttribute.get('*');
if (globalAttr) {
if (globalAttr.boolean !== attr.boolean
|| globalAttr.redundant_if_empty !== attr.redundant_if_empty
|| globalAttr.collapse_and_trim !== attr.collapse_and_trim
|| globalAttr.default_value !== attr.default_value) {
throw new Error(`Global and tag-specific attributes conflict: ${JSON.stringify(globalAttr, null, 2)} ${JSON.stringify(attr, null, 2)}`);
throw new Error(`Global and tag-specific attributes conflict: ${prettyjson(globalAttr)} ${prettyjson(attr)}`);
}
} else {
tagsForAttribute.set(tagName, attr);
tagsForNSAttribute.set(tag || '*', attr);
}
}
}
// Sort output JSON object by property so diffs are clearer.
await fs.writeFile(ATTRS_PATH, JSON.stringify(
Object.fromEntries(
[...attributes.entries()]
.map(([attrName, tagsMap]) => [attrName, Object.fromEntries(
[...tagsMap.entries()]
.sort((a, b) => a[0].localeCompare(b[0]))
)])
.sort((a, b) => a[0].localeCompare(b[0]))
),
null,
2,
));
await fs.writeFile(ATTRS_PATH, prettyjson(deepObjectifyMap(attributes)));
};
(async () => {

View File

@ -16,7 +16,7 @@ if [ -f native/Cargo.toml.orig ]; then
echo 'Not altering Node.js Cargo.toml file'
else
cp native/Cargo.toml native/Cargo.toml.orig
sed -i 's%^hyperbuild = .*$%hyperbuild = { path = "../.." }%' native/Cargo.toml
sed -i '' 's%^hyperbuild = .*$%hyperbuild = { path = "../.." }%' native/Cargo.toml
fi
npx neon build --release
mv native/Cargo.toml.orig native/Cargo.toml

View File

@ -1,6 +1,7 @@
pub use crate::err::ErrorType as ErrorType;
use crate::proc::Processor;
use crate::unit::content::process_content;
use crate::unit::tag::Namespace;
mod err;
mod pattern;
@ -11,7 +12,7 @@ mod unit;
pub fn hyperbuild(code: &mut [u8]) -> Result<usize, (ErrorType, usize)> {
let mut proc = Processor::new(code);
match process_content(&mut proc, None) {
match process_content(&mut proc, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()),
Err(e) => Err((e, proc.read_len())),
}

View File

@ -4,6 +4,7 @@ use crate::err::ProcessingResult;
use crate::proc::{Processor, ProcessorRange};
use crate::spec::codepoint::{is_control, is_whitespace};
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue, skip_attr_value};
use crate::unit::tag::Namespace;
mod value;
@ -15,21 +16,36 @@ pub struct AttributeMinification {
}
pub enum AttrMapEntry {
AllHtmlElements(AttributeMinification),
DistinctHtmlElements(Map<&'static [u8], AttributeMinification>),
AllNamespaceElements(AttributeMinification),
SpecificNamespaceElements(Map<&'static [u8], AttributeMinification>),
}
pub struct AttrMap(Map<&'static [u8], &'static AttrMapEntry>);
#[derive(Clone, Copy)]
pub struct ByNamespace {
html: Option<&'static AttrMapEntry>,
svg: Option<&'static AttrMapEntry>,
}
impl ByNamespace {
fn get(&self, ns: Namespace) -> Option<&'static AttrMapEntry> {
match ns {
Namespace::Html => self.html,
Namespace::Svg => self.svg,
}
}
}
pub struct AttrMap(Map<&'static [u8], ByNamespace>);
impl AttrMap {
pub const fn new(map: Map<&'static [u8], &'static AttrMapEntry>) -> AttrMap {
pub const fn new(map: Map<&'static [u8], ByNamespace>) -> AttrMap {
AttrMap(map)
}
pub fn get(&self, tag: &[u8], attr: &[u8]) -> Option<&AttributeMinification> {
self.0.get(attr).and_then(|entry| match entry {
AttrMapEntry::AllHtmlElements(min) => Some(min),
AttrMapEntry::DistinctHtmlElements(map) => map.get(tag),
pub fn get(&self, ns: Namespace, tag: &[u8], attr: &[u8]) -> Option<&AttributeMinification> {
self.0.get(attr).and_then(|namespaces| namespaces.get(ns)).and_then(|entry| match entry {
AttrMapEntry::AllNamespaceElements(min) => Some(min),
AttrMapEntry::SpecificNamespaceElements(map) => map.get(tag),
})
}
}
@ -59,11 +75,11 @@ fn is_name_char(c: u8) -> bool {
}
}
pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> ProcessingResult<ProcessedAttr> {
pub fn process_attr(proc: &mut Processor, ns: Namespace, element: ProcessorRange) -> ProcessingResult<ProcessedAttr> {
// It's possible to expect attribute name but not be called at an attribute, e.g. due to whitespace between name and
// value, which causes name to be considered boolean attribute and `=` to be start of new (invalid) attribute name.
let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().out_range());
let attr_cfg = ATTRS.get(&proc[element], &proc[name]);
let attr_cfg = ATTRS.get(ns, &proc[element], &proc[name]);
let is_boolean = attr_cfg.filter(|attr| attr.boolean).is_some();
let after_name = proc.checkpoint();

View File

@ -7,7 +7,7 @@ use crate::unit::bang::process_bang;
use crate::unit::comment::process_comment;
use crate::unit::entity::{EntityType, parse_entity};
use crate::unit::instruction::process_instruction;
use crate::unit::tag::{MaybeClosingTag, process_tag};
use crate::unit::tag::{MaybeClosingTag, Namespace, process_tag};
#[derive(Copy, Clone, PartialEq, Eq)]
enum ContentType {
@ -50,7 +50,7 @@ impl ContentType {
}
}
pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
pub fn process_content(proc: &mut Processor, ns: Namespace, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
let &WhitespaceMinification { collapse, destroy_whole, trim } = get_whitespace_minification_for_tag(parent.map(|r| &proc[r]));
let handle_ws = collapse || destroy_whole || trim;
@ -115,10 +115,7 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
match next_content_type {
ContentType::Tag => {
proc.suspend(uep);
let new_closing_tag = process_tag(
proc,
prev_sibling_closing_tag,
)?;
let new_closing_tag = process_tag(proc, ns, prev_sibling_closing_tag)?;
prev_sibling_closing_tag.replace(new_closing_tag);
// Always resume as closing tag might not exist or be omitted.
proc.resume(uep);

View File

@ -10,6 +10,12 @@ use crate::unit::content::process_content;
use crate::unit::script::process_script;
use crate::unit::style::process_style;
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum Namespace {
Html,
Svg,
}
pub static JAVASCRIPT_MIME_TYPES: Set<&'static [u8]> = phf_set! {
b"application/ecmascript",
b"application/javascript",
@ -82,7 +88,7 @@ impl MaybeClosingTag {
}
// TODO Comment param `prev_sibling_closing_tag`.
pub fn process_tag(proc: &mut Processor, mut prev_sibling_closing_tag: MaybeClosingTag) -> ProcessingResult<MaybeClosingTag> {
pub fn process_tag(proc: &mut Processor, ns: Namespace, mut prev_sibling_closing_tag: MaybeClosingTag) -> ProcessingResult<MaybeClosingTag> {
// TODO Minify opening and closing tag whitespace after name and last attr.
// TODO DOC No checking if opening and closing names match.
// Expect to be currently at an opening tag.
@ -147,7 +153,7 @@ pub fn process_tag(proc: &mut Processor, mut prev_sibling_closing_tag: MaybeClos
_ => {}
};
let ProcessedAttr { name, typ, value } = process_attr(proc, tag_name)?;
let ProcessedAttr { name, typ, value } = process_attr(proc, ns, tag_name)?;
match (tag_type, &proc[name]) {
(TagType::Script, b"type") => {
// It's JS if the value is empty or one of `JAVASCRIPT_MIME_TYPES`.
@ -160,7 +166,7 @@ pub fn process_tag(proc: &mut Processor, mut prev_sibling_closing_tag: MaybeClos
}
(_, name) => {
// TODO Check if HTML tag before checking if attribute removal applies to all elements.
erase_attr = match (value, ATTRS.get(&proc[tag_name], name)) {
erase_attr = match (value, ATTRS.get(ns, &proc[tag_name], name)) {
(None, Some(AttributeMinification { redundant_if_empty: true, .. })) => true,
(Some(val), Some(AttributeMinification { default_value: Some(defval), .. })) => proc[val].eq(*defval),
_ => false,
@ -188,10 +194,16 @@ pub fn process_tag(proc: &mut Processor, mut prev_sibling_closing_tag: MaybeClos
return Ok(MaybeClosingTag(None));
};
let child_ns = if ns != Namespace::Svg && proc[tag_name].eq(b"svg") {
Namespace::Svg
} else {
ns
};
match tag_type {
TagType::Script => process_script(proc)?,
TagType::Style => process_style(proc)?,
_ => process_content(proc, Some(tag_name))?,
_ => process_content(proc, child_ns, Some(tag_name))?,
};
// Require closing tag for non-void.