Clear out archive and unused tag names sets; update README with whitespace minification modes
This commit is contained in:
parent
1db8f4aa13
commit
5f8da411b3
13
Cargo.toml
13
Cargo.toml
|
@ -1,10 +1,23 @@
|
|||
[package]
|
||||
name = "hyperbuild"
|
||||
description = "One-pass in-place HTML minifier written in Rust with advanced whitespace handling"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/wilsonzlin/hyperbuild"
|
||||
readme = "README.md"
|
||||
keywords = ["html", "compress", "minifier", "minify", "minification"]
|
||||
categories = ["compression", "command-line-utilities", "development-tools::build-utils", "web-programming"]
|
||||
repository = "https://github.com/wilsonzlin/hyperbuild.git"
|
||||
version = "0.0.1"
|
||||
authors = ["Wilson Lin <code@wilsonl.in>"]
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "actively-developed" }
|
||||
|
||||
[dependencies]
|
||||
phf = { version = "0.8.0", features = ["macros"] }
|
||||
cascade = "0.1.4"
|
||||
structopt = "0.3.5"
|
||||
|
||||
[profile.release]
|
||||
panic = 'abort'
|
||||
|
|
78
README.md
78
README.md
|
@ -2,8 +2,6 @@
|
|||
|
||||
A fast one-pass in-place HTML minifier written in Rust with advanced whitespace handling.
|
||||
|
||||
Currently in beta, working on documentation and tests. Issues and pull requests welcome! Guide below is currently WIP.
|
||||
|
||||
## Features
|
||||
|
||||
- Minification is done in one pass with no backtracking or DOM/AST building.
|
||||
|
@ -26,6 +24,82 @@ hyperbuild has advanced whitespace minification that can allow strategies such a
|
|||
- Trim and collapse whitespace in content tags, as whitespace is collapsed anyway when rendered.
|
||||
- Remove whitespace in layout tags, which allows the use of inline layouts while keeping formatted code.
|
||||
|
||||
#### Collapsing whitespace
|
||||
|
||||
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020).
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··The·quick·brown·fox↵
|
||||
··jumps·over·the·lazy↵
|
||||
··dog.↵
|
||||
</p>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<p>·The·quick·brown·fox·jumps·over·the·lazy·dog.·</p>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### Destroying whole whitespace
|
||||
|
||||
Remove any text nodes that only consist of whitespace characters.
|
||||
|
||||
Especially useful when using `display: inline-block` so that whitespace between elements (e.g. indentation) does not alter layout and styling.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<ul>↵
|
||||
··<li>A</li>↵
|
||||
··<li>B</li>↵
|
||||
··<li>C</li>↵
|
||||
</ul>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<ul><li>A</li><li>B</li><li>C</li></ul>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### Trimming whitespace
|
||||
|
||||
Remove any whitespace from the start and end of a tag, if the first and/or last node is a text node.
|
||||
|
||||
Useful when combined with whitespace collapsing.
|
||||
|
||||
Other whitespace between text nodes and tags are not removed, as it is not recommended to mix non-formatting tags with raw text.
|
||||
|
||||
Basically, a tag should only either contain text and [formatting tags](#formatting-tags), or only non-formatting tags.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··Hey,·I·<em>just</em>·found↵
|
||||
··out·about·this·<strong>cool</strong>·website!↵
|
||||
··<div></div>↵
|
||||
</p>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<p>Hey,·I·<em>just</em>·found↵
|
||||
··out·about·this·<strong>cool</strong>·website!↵
|
||||
··<div></div></p>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
### Attributes
|
||||
|
||||
Any entities in attribute values are decoded, and then the most optimal representation is calculated and used:
|
||||
|
|
|
@ -1,92 +0,0 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_map_tag_relations* hb_rule_tag_child_blacklist_map;
|
||||
|
||||
void hb_rule_tag_child_blacklist_map_add_entries(hb_map_tag_relations* map)
|
||||
{
|
||||
// <address>
|
||||
hb_set_tag_names* address = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(address);
|
||||
hb_rule_tag_sectioning_add_elems(address);
|
||||
hb_set_tag_names_add_whole_literal(address, "address");
|
||||
hb_set_tag_names_add_whole_literal(address, "header");
|
||||
hb_set_tag_names_add_whole_literal(address, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "address", address);
|
||||
|
||||
// <audio>
|
||||
hb_set_tag_names* audio = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(audio);
|
||||
hb_map_tag_relations_set_whole_literal(map, "audio", audio);
|
||||
|
||||
// <dfn>
|
||||
hb_set_tag_names* dfn = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dfn, "dfn");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dfn", dfn);
|
||||
|
||||
// <dt>
|
||||
hb_set_tag_names* dt = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(dt);
|
||||
hb_rule_tag_sectioning_add_elems(dt);
|
||||
hb_set_tag_names_add_whole_literal(dt, "header");
|
||||
hb_set_tag_names_add_whole_literal(dt, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
|
||||
|
||||
// <footer>
|
||||
hb_set_tag_names* footer = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(footer, "header");
|
||||
hb_set_tag_names_add_whole_literal(footer, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "footer", footer);
|
||||
|
||||
// <form>
|
||||
hb_set_tag_names* form = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(form, "form");
|
||||
hb_map_tag_relations_set_whole_literal(map, "form", form);
|
||||
|
||||
// <header>
|
||||
hb_set_tag_names* header = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(header, "header");
|
||||
hb_set_tag_names_add_whole_literal(header, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "header", header);
|
||||
|
||||
// <label>
|
||||
hb_set_tag_names* label = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(label, "label");
|
||||
hb_map_tag_relations_set_whole_literal(map, "label", label);
|
||||
|
||||
// <progress>
|
||||
hb_set_tag_names* progress = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(progress, "progress");
|
||||
hb_map_tag_relations_set_whole_literal(map, "progress", progress);
|
||||
|
||||
// <th>
|
||||
hb_set_tag_names* th = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(th);
|
||||
hb_rule_tag_sectioning_add_elems(th);
|
||||
hb_set_tag_names_add_whole_literal(th, "header");
|
||||
hb_set_tag_names_add_whole_literal(th, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "th", th);
|
||||
|
||||
// <video>
|
||||
hb_set_tag_names* video = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(video);
|
||||
hb_map_tag_relations_set_whole_literal(map, "video", video);
|
||||
}
|
||||
|
||||
void hb_rule_tag_child_blacklist_init(void)
|
||||
{
|
||||
hb_rule_tag_child_blacklist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_child_blacklist_map_add_entries(
|
||||
hb_rule_tag_child_blacklist_map);
|
||||
}
|
||||
|
||||
// Check if a parent is allowed to have a specific child, based on the
|
||||
// blacklist.
|
||||
bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent,
|
||||
nh_view_str* child)
|
||||
{
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(
|
||||
hb_rule_tag_child_blacklist_map, parent);
|
||||
return set == NULL || !hb_set_tag_names_has(set, child);
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_map_tag_relations* hb_rule_tag_child_whitelist_map;
|
||||
|
||||
void hb_rule_tag_child_whitelist_map_add_entries(hb_map_tag_relations* map)
|
||||
{
|
||||
// <colgroup>
|
||||
hb_set_tag_names* colgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(colgroup, "col");
|
||||
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
|
||||
|
||||
// <datalist>
|
||||
hb_set_tag_names* datalist = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(datalist, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "datalist", datalist);
|
||||
|
||||
// <dl>
|
||||
hb_set_tag_names* dl = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dl, "dt");
|
||||
hb_set_tag_names_add_whole_literal(dl, "dd");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dl", dl);
|
||||
|
||||
// <hgroup>
|
||||
hb_set_tag_names* hgroup = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(hgroup);
|
||||
hb_map_tag_relations_set_whole_literal(map, "hgroup", hgroup);
|
||||
|
||||
// <ol>
|
||||
hb_set_tag_names* ol = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(ol, "li");
|
||||
hb_map_tag_relations_set_whole_literal(map, "ol", ol);
|
||||
|
||||
// <optgroup>
|
||||
hb_set_tag_names* optgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(optgroup, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
|
||||
|
||||
// <picture>
|
||||
hb_set_tag_names* picture = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(picture, "source");
|
||||
hb_set_tag_names_add_whole_literal(picture, "img");
|
||||
hb_map_tag_relations_set_whole_literal(map, "picture", picture);
|
||||
|
||||
// <select>
|
||||
hb_set_tag_names* select = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(select, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(select, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "select", select);
|
||||
|
||||
// <table>
|
||||
hb_set_tag_names* table = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(table, "caption");
|
||||
hb_set_tag_names_add_whole_literal(table, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(table, "col");
|
||||
hb_set_tag_names_add_whole_literal(table, "thead");
|
||||
hb_set_tag_names_add_whole_literal(table, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(table, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(table, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "table", table);
|
||||
|
||||
// <tbody>
|
||||
hb_set_tag_names* tbody = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tbody, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
|
||||
|
||||
// <tfoot>
|
||||
hb_set_tag_names* tfoot = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tfoot, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
|
||||
|
||||
// <thead>
|
||||
hb_set_tag_names* thead = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(thead, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
|
||||
|
||||
// <tr>
|
||||
hb_set_tag_names* tr = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tr, "td");
|
||||
hb_set_tag_names_add_whole_literal(tr, "th");
|
||||
hb_set_tag_names_add_whole_literal(tr, "template");
|
||||
hb_set_tag_names_add_whole_literal(tr, "script");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
|
||||
|
||||
// <ul>
|
||||
hb_set_tag_names* ul = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(ul, "li");
|
||||
hb_map_tag_relations_set_whole_literal(map, "ul", ul);
|
||||
}
|
||||
|
||||
void hb_rule_tag_child_whitelist_init(void)
|
||||
{
|
||||
hb_rule_tag_child_whitelist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_child_whitelist_map_add_entries(
|
||||
hb_rule_tag_child_whitelist_map);
|
||||
}
|
||||
|
||||
// Check if a parent is allowed to have a specific child.
|
||||
bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent,
|
||||
nh_view_str* child)
|
||||
{
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(
|
||||
hb_rule_tag_child_whitelist_map, parent);
|
||||
return set == NULL || hb_set_tag_names_has(set, child);
|
||||
}
|
|
@ -1,215 +0,0 @@
|
|||
#### Beginning and end
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··The·quick·brown·fox↵
|
||||
</p>
|
||||
```
|
||||
|
||||
#### Between text and tags
|
||||
|
||||
```html
|
||||
<p>The·quick·brown·fox·<strong>jumps</strong>·over·the·lazy·dog.</p>
|
||||
```
|
||||
|
||||
#### Contiguous
|
||||
|
||||
```html
|
||||
<select>↵
|
||||
··<option>Jan:·········1</option>↵
|
||||
··<option>Feb:········10</option>↵
|
||||
··<option>Mar:·······100</option>↵
|
||||
··<option>Apr:······1000</option>↵
|
||||
··<option>May:·····10000</option>↵
|
||||
··<option>Jun:····100000</option>↵
|
||||
</select>
|
||||
```
|
||||
|
||||
#### Whole text
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
···↵
|
||||
</p>
|
||||
```
|
||||
|
||||
### Tag classification
|
||||
|
||||
|Type|Content|
|
||||
|---|---|
|
||||
|Formatting tags|Text nodes|
|
||||
|Content tags|Formatting tags, text nodes|
|
||||
|Layout tags|Layout tags, content tags|
|
||||
|Content-first tags|Content of content tags or layout tags (but not both)|
|
||||
|
||||
#### Specific tags
|
||||
|
||||
Tags not in one of the categories below are **specific tags**.
|
||||
|
||||
#### Formatting tags
|
||||
|
||||
```html
|
||||
<strong> moat </strong>
|
||||
```
|
||||
|
||||
#### Content tags
|
||||
|
||||
```html
|
||||
<p>Some <strong>content</strong></p>
|
||||
```
|
||||
|
||||
#### Content-first tags
|
||||
|
||||
```html
|
||||
<li>Anthony</li>
|
||||
```
|
||||
|
||||
```html
|
||||
<li>
|
||||
<div>
|
||||
</div>
|
||||
</li>
|
||||
```
|
||||
|
||||
#### Layout tags
|
||||
|
||||
```html
|
||||
<div>
|
||||
<div></div>
|
||||
</div>
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
For options that have a list of tags as their value, the tags should be separated by a comma.
|
||||
|
||||
An `*` (asterisk, U+002A) can be used to represent the complete set of possible tags. Providing no value represents the empty set.
|
||||
Both values essentially fully enables or disables the option.
|
||||
|
||||
For brevity, hyperbuild has built-in sets of tags that can be used in place of declaring all their members; they begin with a `$` sign:
|
||||
|
||||
|Name|Tags|Source|
|
||||
|---|---|---|
|
||||
|`$content`|`address`, `audio`, `button`, `canvas`, `caption`, `figcaption`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `legend`, `meter`, `object`, `option`, `p`, `summary`, `textarea`, `video`|[contenttags.c](src/main/c/rule/tag/contenttags.c)|
|
||||
|`$contentfirst`|`dd`, `details`, `dt`, `iframe`, `label`, `li`, `noscript`, `output`, `progress`, `slot`, `td`, `template`, `th`|[contentfirsttags.c](src/main/c/rule/tag/contentfirsttags.c)|
|
||||
|`$formatting`|`a`, `abbr`, `b`, `bdi`, `bdo`, `cite`, `data`, `del`, `dfn`, `em`, `i`, `ins`, `kbd`, `mark`, `q`, `rp`, `rt`, `rtc`, `ruby`, `s`, `samp`, `small`, `span`, `strong`, `sub`, `sup`, `time`, `u`, `var`, `wbr`|[formattingtags.c](src/main/c/rule/tag/formattingtags.c)|
|
||||
|`$layout`|`blockquote`, `body`, `colgroup`, `datalist`, `dialog`, `div`, `dl`, `fieldset`, `figure`, `footer`, `form`, `head`, `header`, `hgroup`, `html`, `main`, `map`, `menu`, `nav`, `ol`, `optgroup`, `picture`, `section`, `select`, `table`, `tbody`, `tfoot`, `thead`, `tr`, `ul`|[layouttags.c](src/main/c/rule/tag/layouttags.c)|
|
||||
|`$specific`|All [SVG tags](src/main/c/rule/tag/svgtags.c), `area`, `base`, `br`, `code`, `col`, `embed`, `hr`, `img`, `input`, `param`, `pre`, `script`, `source`, `track`|[specifictags.c](src/main/c/rule/tag/specifictags.c)|
|
||||
|`$heading`|`hgroup`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`|[headingtags.c](src/main/c/rule/tag/headingtags.c)|
|
||||
|`$media`|`audio`, `video`|[mediatags.c](src/main/c/rule/tag/mediatags.c)|
|
||||
|`$sectioning`|`article`, `aside`, `nav`, `section`|[sectioningtags.c](src/main/c/rule/tag/sectioningtags.c)|
|
||||
|`$void`|`area`, `base`, `br`, `col`, `embed`, `hr`, `img`, `input`, `keygen`, `link`, `meta`, `param`, `source`, `track`, `wbr`|[voidtags.c](src/main/c/rule/tag/voidtags.c)|
|
||||
|`$wss`|`pre`, `code`|[wsstags.c](src/main/c/rule/tag/wsstags.c)|
|
||||
|
||||
As an example, for `--MXcollapseWhitespace`, here are some possible values:
|
||||
|
||||
|Arguments|Description|
|
||||
|---|---|
|
||||
|`--MXcollapseWhitespace $wss`|Collapse whitespace in all tags except `$wss` ones|
|
||||
|`--MXcollapseWhitespace $content,$wss`|Collapse whitespace in all tags except `$content` and `$wss` ones|
|
||||
|`--MXcollapseWhitespace $content,$wss,dd`|Collapse whitespace in all tags except `$content` and `$wss` ones, as well as the `dd` tag|
|
||||
|`--MXcollapseWhitespace sup,dd`|Collapse whitespace in all tags except `sup` and `dd`|
|
||||
|`--MXcollapseWhitespace`|Collapse whitespace in all tags|
|
||||
|`--MXcollapseWhitespace *`|Don't collapse whitespace in any tag|
|
||||
|
||||
#### `--MXcollapseWhitespace $wss`
|
||||
|
||||
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020), unless they are a child of the tags specified by this option.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··The·quick·brown·fox↵
|
||||
··jumps·over·the·lazy↵
|
||||
··dog.↵
|
||||
</p>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<p>·The·quick·brown·fox·jumps·over·the·lazy·dog.·</p>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXdestroyWholeWhitespace $wss,$content,$formatting`
|
||||
|
||||
Remove any text nodes that only consist of whitespace characters, unless they are a child of the tags specified by this option.
|
||||
|
||||
Especially useful when using `display: inline-block` so that whitespace between elements (e.g. indentation) does not alter layout and styling.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<div>↵
|
||||
··<h1></h1>↵
|
||||
··<ul></ul>↵
|
||||
··A·quick·<strong>brown</strong>·<em>fox</em>.↵
|
||||
</div>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<div><h1></h1><ul></ul>↵
|
||||
··A·quick·<strong>brown</strong><em>fox</em>.↵
|
||||
</div>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXtrimWhitespace $wss,$formatting`
|
||||
|
||||
Remove any whitespace from the start and end of a tag, if the first and/or last node is a text node, unless the tag is one of the tags specified by this option.
|
||||
|
||||
Useful when combined with whitespace collapsing.
|
||||
|
||||
Other whitespace between text nodes and tags are not removed, as it is not recommended to mix non-formatting tags with raw text.
|
||||
|
||||
Basically, a tag should only either contain text and [formatting tags](#formatting-tags), or only non-formatting tags.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··Hey,·I·<em>just</em>·found↵
|
||||
··out·about·this·<strong>cool</strong>·website!↵
|
||||
··<div></div>↵
|
||||
</p>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<p>Hey,·I·<em>just</em>·found↵
|
||||
··out·about·this·<strong>cool</strong>·website!↵
|
||||
··<div></div></p>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXtrimClassAttribute`
|
||||
|
||||
Don't trim and collapse whitespace in `class` attribute values.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<div class="
|
||||
hi
|
||||
lo
|
||||
a b c
|
||||
d e
|
||||
f g
|
||||
"></div>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<div class="hi lo a b c d e f g"></div>
|
||||
```
|
||||
|
||||
</table>
|
|
@ -1,20 +0,0 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
void hb_rule_tag_parent_blacklist_init(void)
|
||||
{
|
||||
// Don't do anything. This rule is currently unused.
|
||||
}
|
||||
|
||||
// Check if a child is allowed to have a specific parent, based on the
|
||||
// blacklist.
|
||||
bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child,
|
||||
nh_view_str* parent)
|
||||
{
|
||||
// Since this rule is currently not being used, directly allow without
|
||||
// any checks.
|
||||
(void) child;
|
||||
(void) parent;
|
||||
return true;
|
||||
}
|
|
@ -1,151 +0,0 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_map_tag_relations* hb_rule_tag_parent_whitelist_map;
|
||||
|
||||
void hb_rule_tag_parent_whitelist_map_add_entries(hb_map_tag_relations* map)
|
||||
{
|
||||
// <caption>
|
||||
hb_set_tag_names* caption = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(caption, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "caption", caption);
|
||||
|
||||
// <col>
|
||||
hb_set_tag_names* col = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(col, "table");
|
||||
hb_set_tag_names_add_whole_literal(col, "colgroup");
|
||||
hb_map_tag_relations_set_whole_literal(map, "col", col);
|
||||
|
||||
// <colgroup>
|
||||
hb_set_tag_names* colgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(colgroup, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
|
||||
|
||||
// <dd>
|
||||
hb_set_tag_names* dd = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dd, "dl");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dd", dd);
|
||||
|
||||
// <dt>
|
||||
hb_set_tag_names* dt = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dt, "dl");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
|
||||
|
||||
// <figcaption>
|
||||
hb_set_tag_names* figcaption = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(figcaption, "figure");
|
||||
hb_map_tag_relations_set_whole_literal(map, "figcaption", figcaption);
|
||||
|
||||
// <legend>
|
||||
hb_set_tag_names* legend = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(legend, "fieldset");
|
||||
hb_map_tag_relations_set_whole_literal(map, "legend", legend);
|
||||
|
||||
// <li>
|
||||
hb_set_tag_names* li = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(li, "ul");
|
||||
hb_set_tag_names_add_whole_literal(li, "ol");
|
||||
hb_set_tag_names_add_whole_literal(li, "menu");
|
||||
hb_map_tag_relations_set_whole_literal(map, "li", li);
|
||||
|
||||
// <optgroup>
|
||||
hb_set_tag_names* optgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(optgroup, "select");
|
||||
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
|
||||
|
||||
// <option>
|
||||
hb_set_tag_names* option = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(option, "select");
|
||||
hb_set_tag_names_add_whole_literal(option, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(option, "datalist");
|
||||
hb_map_tag_relations_set_whole_literal(map, "option", option);
|
||||
|
||||
// <param>
|
||||
hb_set_tag_names* param = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(param, "object");
|
||||
hb_map_tag_relations_set_whole_literal(map, "param", param);
|
||||
|
||||
// <rp>
|
||||
hb_set_tag_names* rp = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rp, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rp", rp);
|
||||
|
||||
// <rt>
|
||||
hb_set_tag_names* rt = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rt, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rt", rt);
|
||||
|
||||
// <rtc>
|
||||
hb_set_tag_names* rtc = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rtc, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rtc", rtc);
|
||||
|
||||
// <summary>
|
||||
hb_set_tag_names* summary = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(summary, "details");
|
||||
hb_map_tag_relations_set_whole_literal(map, "summary", summary);
|
||||
|
||||
// <source>
|
||||
hb_set_tag_names* source = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(source);
|
||||
hb_set_tag_names_add_whole_literal(source, "picture");
|
||||
hb_map_tag_relations_set_whole_literal(map, "source", source);
|
||||
|
||||
// <track>
|
||||
hb_set_tag_names* track = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(track);
|
||||
hb_map_tag_relations_set_whole_literal(map, "track", track);
|
||||
|
||||
// <tbody>
|
||||
hb_set_tag_names* tbody = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tbody, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
|
||||
|
||||
// <td>
|
||||
hb_set_tag_names* td = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(td, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "td", td);
|
||||
|
||||
// <tfoot>
|
||||
hb_set_tag_names* tfoot = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tfoot, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
|
||||
|
||||
// <th>
|
||||
hb_set_tag_names* th = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(th, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "th", th);
|
||||
|
||||
// <thead>
|
||||
hb_set_tag_names* thead = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(thead, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
|
||||
|
||||
// <tr>
|
||||
hb_set_tag_names* tr = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tr, "table");
|
||||
hb_set_tag_names_add_whole_literal(tr, "thead");
|
||||
hb_set_tag_names_add_whole_literal(tr, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(tr, "tfoot");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
|
||||
|
||||
// <template>
|
||||
// Should be <body>, <frameset>, <head>, <dl>, <colgroup>, but ignoring.
|
||||
}
|
||||
|
||||
void hb_rule_tag_parent_whitelist_init(void)
|
||||
{
|
||||
hb_rule_tag_parent_whitelist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_parent_whitelist_map_add_entries(
|
||||
hb_rule_tag_parent_whitelist_map);
|
||||
}
|
||||
|
||||
// Check if a child is allowed to have a specific parent.
|
||||
bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child,
|
||||
nh_view_str* parent)
|
||||
{
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(
|
||||
hb_rule_tag_parent_whitelist_map, child);
|
||||
return set == NULL || hb_set_tag_names_has(set, parent);
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
# Tag types
|
||||
|
||||
|Type|Expected content|
|
||||
|---|---|
|
||||
|Formatting tags|Text nodes.|
|
||||
|Content tags|Formatting tags, text nodes.|
|
||||
|Layout tags|Layout tags, content tags.|
|
||||
|Content-first tags|Content of content tags or layout tags (but not both).|
|
||||
|
||||
## Formatting tags
|
||||
|
||||
```html
|
||||
<strong> moat </strong>
|
||||
```
|
||||
|
||||
## Content tags
|
||||
|
||||
```html
|
||||
<p>Some <strong>content</strong></p>
|
||||
```
|
||||
|
||||
## Content-first tags
|
||||
|
||||
```html
|
||||
<li>Anthony</li>
|
||||
```
|
||||
|
||||
```html
|
||||
<li>
|
||||
<div>
|
||||
</div>
|
||||
</li>
|
||||
```
|
||||
|
||||
## Layout tags
|
||||
|
||||
```html
|
||||
<div>
|
||||
<div></div>
|
||||
</div>
|
||||
```
|
|
@ -1,6 +1,7 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
// Difference to MDN's inline text semantics list: -br, +del, +ins.
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element#Inline_text_semantics.
|
||||
// Differences to tags listed in table at above URL: -br, +del, +ins.
|
||||
pub static FORMATTING_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"a",
|
||||
b"abbr",
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
pub static HEADING_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"hgroup",
|
||||
b"h1",
|
||||
b"h2",
|
||||
b"h3",
|
||||
b"h4",
|
||||
b"h5",
|
||||
b"h6",
|
||||
};
|
|
@ -1,148 +0,0 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element at 2018-07-01T05:55:00Z.
|
||||
pub static HTML_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"a",
|
||||
b"abbr",
|
||||
b"acronym",
|
||||
b"address",
|
||||
b"applet",
|
||||
b"area",
|
||||
b"article",
|
||||
b"aside",
|
||||
b"audio",
|
||||
b"b",
|
||||
b"basefont",
|
||||
b"bdi",
|
||||
b"bdo",
|
||||
b"bgsound",
|
||||
b"big",
|
||||
b"blink",
|
||||
b"blockquote",
|
||||
b"body",
|
||||
b"br",
|
||||
b"button",
|
||||
b"canvas",
|
||||
b"caption",
|
||||
b"center",
|
||||
b"cite",
|
||||
b"code",
|
||||
b"col",
|
||||
b"colgroup",
|
||||
b"command",
|
||||
b"content",
|
||||
b"data",
|
||||
b"datalist",
|
||||
b"dd",
|
||||
b"del",
|
||||
b"details",
|
||||
b"dfn",
|
||||
b"dialog",
|
||||
b"dir",
|
||||
b"div",
|
||||
b"dl",
|
||||
b"dt",
|
||||
b"element",
|
||||
b"em",
|
||||
b"embed",
|
||||
b"fieldset",
|
||||
b"figcaption",
|
||||
b"figure",
|
||||
b"font",
|
||||
b"footer",
|
||||
b"form",
|
||||
b"frame",
|
||||
b"frameset",
|
||||
b"h1",
|
||||
b"h2",
|
||||
b"h3",
|
||||
b"h4",
|
||||
b"h5",
|
||||
b"h6",
|
||||
b"head",
|
||||
b"header",
|
||||
b"hgroup",
|
||||
b"hr",
|
||||
b"html",
|
||||
b"i",
|
||||
b"iframe",
|
||||
b"image",
|
||||
b"img",
|
||||
b"input",
|
||||
b"ins",
|
||||
b"isindex",
|
||||
b"kbd",
|
||||
b"keygen",
|
||||
b"label",
|
||||
b"legend",
|
||||
b"li",
|
||||
b"link",
|
||||
b"listing",
|
||||
b"main",
|
||||
b"map",
|
||||
b"mark",
|
||||
b"marquee",
|
||||
b"menu",
|
||||
b"menuitem",
|
||||
b"meta",
|
||||
b"meter",
|
||||
b"multicol",
|
||||
b"nav",
|
||||
b"nextid",
|
||||
b"nobr",
|
||||
b"noembed",
|
||||
b"noframes",
|
||||
b"noscript",
|
||||
b"object",
|
||||
b"ol",
|
||||
b"optgroup",
|
||||
b"option",
|
||||
b"output",
|
||||
b"p",
|
||||
b"param",
|
||||
b"picture",
|
||||
b"plaintext",
|
||||
b"pre",
|
||||
b"progress",
|
||||
b"q",
|
||||
b"rp",
|
||||
b"rt",
|
||||
b"rtc",
|
||||
b"ruby",
|
||||
b"s",
|
||||
b"samp",
|
||||
b"script",
|
||||
b"section",
|
||||
b"select",
|
||||
b"shadow",
|
||||
b"slot",
|
||||
b"small",
|
||||
b"source",
|
||||
b"spacer",
|
||||
b"span",
|
||||
b"strike",
|
||||
b"strong",
|
||||
b"style",
|
||||
b"sub",
|
||||
b"summary",
|
||||
b"sup",
|
||||
b"table",
|
||||
b"tbody",
|
||||
b"td",
|
||||
b"template",
|
||||
b"textarea",
|
||||
b"tfoot",
|
||||
b"th",
|
||||
b"thead",
|
||||
b"time",
|
||||
b"title",
|
||||
b"tr",
|
||||
b"track",
|
||||
b"tt",
|
||||
b"u",
|
||||
b"ul",
|
||||
b"var",
|
||||
b"video",
|
||||
b"wbr",
|
||||
b"xmp",
|
||||
};
|
|
@ -1,12 +1,8 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
// Sectioning tags.
|
||||
b"article",
|
||||
b"aside",
|
||||
b"nav",
|
||||
b"section",
|
||||
// Other tags.
|
||||
b"blockquote",
|
||||
b"body",
|
||||
b"colgroup",
|
||||
|
@ -25,9 +21,11 @@ pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
|
|||
b"main",
|
||||
b"map",
|
||||
b"menu",
|
||||
b"nav",
|
||||
b"ol",
|
||||
b"optgroup",
|
||||
b"picture",
|
||||
b"section",
|
||||
b"select",
|
||||
b"table",
|
||||
b"tbody",
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
pub static MEDIA_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"audio",
|
||||
b"video",
|
||||
};
|
|
@ -1,9 +0,0 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
pub static SECTIONING_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
// Also used by layout tags.
|
||||
b"article",
|
||||
b"aside",
|
||||
b"nav",
|
||||
b"section",
|
||||
};
|
|
@ -1,19 +0,0 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
// Does not include SVG tags.
|
||||
pub static SPECIFIC_HTML_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"area",
|
||||
b"base",
|
||||
b"br",
|
||||
b"code", // Reason: unlikely to want to minify.
|
||||
b"col",
|
||||
b"embed",
|
||||
b"hr",
|
||||
b"img",
|
||||
b"input",
|
||||
b"param",
|
||||
b"pre", // Reason: unlikely to want to minify.
|
||||
b"script",
|
||||
b"source",
|
||||
b"track",
|
||||
};
|
|
@ -1,95 +0,0 @@
|
|||
use ::phf::{phf_set, Set};
|
||||
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/SVG/Element at 2018-08-04T03:50:00Z.
|
||||
pub static SVG_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"a",
|
||||
b"altGlyph",
|
||||
b"altGlyphDef",
|
||||
b"altGlyphItem",
|
||||
b"animate",
|
||||
b"animateColor",
|
||||
b"animateMotion",
|
||||
b"animateTransform",
|
||||
b"circle",
|
||||
b"clipPath",
|
||||
b"color-profile",
|
||||
b"cursor",
|
||||
b"defs",
|
||||
b"desc",
|
||||
b"discard",
|
||||
b"ellipse",
|
||||
b"feBlend",
|
||||
b"feColorMatrix",
|
||||
b"feComponentTransfer",
|
||||
b"feComposite",
|
||||
b"feConvolveMatrix",
|
||||
b"feDiffuseLighting",
|
||||
b"feDisplacementMap",
|
||||
b"feDistantLight",
|
||||
b"feDropShadow",
|
||||
b"feFlood",
|
||||
b"feFuncA",
|
||||
b"feFuncB",
|
||||
b"feFuncG",
|
||||
b"feFuncR",
|
||||
b"feGaussianBlur",
|
||||
b"feImage",
|
||||
b"feMerge",
|
||||
b"feMergeNode",
|
||||
b"feMorphology",
|
||||
b"feOffset",
|
||||
b"fePointLight",
|
||||
b"feSpecularLighting",
|
||||
b"feSpotLight",
|
||||
b"feTile",
|
||||
b"feTurbulence",
|
||||
b"filter",
|
||||
b"font-face-format",
|
||||
b"font-face-name",
|
||||
b"font-face-src",
|
||||
b"font-face-uri",
|
||||
b"font-face",
|
||||
b"font",
|
||||
b"foreignObject",
|
||||
b"g",
|
||||
b"glyph",
|
||||
b"glyphRef",
|
||||
b"hatch",
|
||||
b"hatchpath",
|
||||
b"hkern",
|
||||
b"image",
|
||||
b"line",
|
||||
b"linearGradient",
|
||||
b"marker",
|
||||
b"mask",
|
||||
b"mesh",
|
||||
b"meshgradient",
|
||||
b"meshpatch",
|
||||
b"meshrow",
|
||||
b"metadata",
|
||||
b"missing-glyph",
|
||||
b"mpath",
|
||||
b"path",
|
||||
b"pattern",
|
||||
b"polygon",
|
||||
b"polyline",
|
||||
b"radialGradient",
|
||||
b"rect",
|
||||
b"script",
|
||||
b"set",
|
||||
b"solidcolor",
|
||||
b"stop",
|
||||
b"style",
|
||||
b"svg",
|
||||
b"switch",
|
||||
b"symbol",
|
||||
b"text",
|
||||
b"textPath",
|
||||
b"title",
|
||||
b"tref",
|
||||
b"tspan",
|
||||
b"unknown",
|
||||
b"use",
|
||||
b"view",
|
||||
b"vkern",
|
||||
};
|
|
@ -1,6 +1,6 @@
|
|||
// "WSS" stands for whitespace-sensitive.
|
||||
use ::phf::{phf_set, Set};
|
||||
|
||||
// "WSS" stands for whitespace-sensitive.
|
||||
pub static WSS_TAGS: Set<&'static [u8]> = phf_set! {
|
||||
b"code",
|
||||
b"pre",
|
||||
|
|
Loading…
Reference in New Issue