Clear out archive and unused tag names sets; update README with whitespace minification modes

This commit is contained in:
Wilson Lin 2019-12-28 11:58:01 +11:00
parent 1db8f4aa13
commit 5f8da411b3
17 changed files with 135 additions and 880 deletions

View File

@ -1,10 +1,23 @@
[package]
name = "hyperbuild"
description = "One-pass in-place HTML minifier written in Rust with advanced whitespace handling"
license = "MIT"
homepage = "https://github.com/wilsonzlin/hyperbuild"
readme = "README.md"
keywords = ["html", "compress", "minifier", "minify", "minification"]
categories = ["compression", "command-line-utilities", "development-tools::build-utils", "web-programming"]
repository = "https://github.com/wilsonzlin/hyperbuild.git"
version = "0.0.1"
authors = ["Wilson Lin <code@wilsonl.in>"]
edition = "2018"
[badges]
maintenance = { status = "actively-developed" }
[dependencies]
phf = { version = "0.8.0", features = ["macros"] }
cascade = "0.1.4"
structopt = "0.3.5"
[profile.release]
panic = 'abort'

View File

@ -2,8 +2,6 @@
A fast one-pass in-place HTML minifier written in Rust with advanced whitespace handling.
Currently in beta, working on documentation and tests. Issues and pull requests welcome! Guide below is currently WIP.
## Features
- Minification is done in one pass with no backtracking or DOM/AST building.
@ -26,6 +24,82 @@ hyperbuild has advanced whitespace minification that can allow strategies such a
- Trim and collapse whitespace in content tags, as whitespace is collapsed anyway when rendered.
- Remove whitespace in layout tags, which allows the use of inline layouts while keeping formatted code.
#### Collapsing whitespace
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020).
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<p>
··The·quick·brown·fox↵
··jumps·over·the·lazy↵
··dog.↵
</p>
```
<td>
```html
<p>·The·quick·brown·fox·jumps·over·the·lazy·dog.·</p>
```
</table>
#### Destroying whole whitespace
Remove any text nodes that only consist of whitespace characters.
Especially useful when using `display: inline-block` so that whitespace between elements (e.g. indentation) does not alter layout and styling.
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<ul>
··<li>A</li>
··<li>B</li>
··<li>C</li>
</ul>
```
<td>
```html
<ul><li>A</li><li>B</li><li>C</li></ul>
```
</table>
#### Trimming whitespace
Remove any whitespace from the start and end of a tag, if the first and/or last node is a text node.
Useful when combined with whitespace collapsing.
Other whitespace between text nodes and tags are not removed, as it is not recommended to mix non-formatting tags with raw text.
Basically, a tag should only either contain text and [formatting tags](#formatting-tags), or only non-formatting tags.
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<p>
··Hey,·I·<em>just</em>·found↵
··out·about·this·<strong>cool</strong>·website!↵
··<div></div>
</p>
```
<td>
```html
<p>Hey,·I·<em>just</em>·found↵
··out·about·this·<strong>cool</strong>·website!↵
··<div></div></p>
```
</table>
### Attributes
Any entities in attribute values are decoded, and then the most optimal representation is calculated and used:

View File

@ -1,92 +0,0 @@
#include <hb/collection.h>
#include <hb/rule.h>
#include <hb/rune.h>
static hb_map_tag_relations* hb_rule_tag_child_blacklist_map;
void hb_rule_tag_child_blacklist_map_add_entries(hb_map_tag_relations* map)
{
// <address>
hb_set_tag_names* address = hb_set_tag_names_create();
hb_rule_tag_heading_add_elems(address);
hb_rule_tag_sectioning_add_elems(address);
hb_set_tag_names_add_whole_literal(address, "address");
hb_set_tag_names_add_whole_literal(address, "header");
hb_set_tag_names_add_whole_literal(address, "footer");
hb_map_tag_relations_set_whole_literal(map, "address", address);
// <audio>
hb_set_tag_names* audio = hb_set_tag_names_create();
hb_rule_tag_media_add_elems(audio);
hb_map_tag_relations_set_whole_literal(map, "audio", audio);
// <dfn>
hb_set_tag_names* dfn = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(dfn, "dfn");
hb_map_tag_relations_set_whole_literal(map, "dfn", dfn);
// <dt>
hb_set_tag_names* dt = hb_set_tag_names_create();
hb_rule_tag_heading_add_elems(dt);
hb_rule_tag_sectioning_add_elems(dt);
hb_set_tag_names_add_whole_literal(dt, "header");
hb_set_tag_names_add_whole_literal(dt, "footer");
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
// <footer>
hb_set_tag_names* footer = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(footer, "header");
hb_set_tag_names_add_whole_literal(footer, "footer");
hb_map_tag_relations_set_whole_literal(map, "footer", footer);
// <form>
hb_set_tag_names* form = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(form, "form");
hb_map_tag_relations_set_whole_literal(map, "form", form);
// <header>
hb_set_tag_names* header = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(header, "header");
hb_set_tag_names_add_whole_literal(header, "footer");
hb_map_tag_relations_set_whole_literal(map, "header", header);
// <label>
hb_set_tag_names* label = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(label, "label");
hb_map_tag_relations_set_whole_literal(map, "label", label);
// <progress>
hb_set_tag_names* progress = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(progress, "progress");
hb_map_tag_relations_set_whole_literal(map, "progress", progress);
// <th>
hb_set_tag_names* th = hb_set_tag_names_create();
hb_rule_tag_heading_add_elems(th);
hb_rule_tag_sectioning_add_elems(th);
hb_set_tag_names_add_whole_literal(th, "header");
hb_set_tag_names_add_whole_literal(th, "footer");
hb_map_tag_relations_set_whole_literal(map, "th", th);
// <video>
hb_set_tag_names* video = hb_set_tag_names_create();
hb_rule_tag_media_add_elems(video);
hb_map_tag_relations_set_whole_literal(map, "video", video);
}
void hb_rule_tag_child_blacklist_init(void)
{
hb_rule_tag_child_blacklist_map = hb_map_tag_relations_create();
hb_rule_tag_child_blacklist_map_add_entries(
hb_rule_tag_child_blacklist_map);
}
// Check if a parent is allowed to have a specific child, based on the
// blacklist.
bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent,
nh_view_str* child)
{
hb_set_tag_names* set = hb_map_tag_relations_get(
hb_rule_tag_child_blacklist_map, parent);
return set == NULL || !hb_set_tag_names_has(set, child);
}

View File

@ -1,106 +0,0 @@
#include <hb/collection.h>
#include <hb/rule.h>
#include <hb/rune.h>
static hb_map_tag_relations* hb_rule_tag_child_whitelist_map;
void hb_rule_tag_child_whitelist_map_add_entries(hb_map_tag_relations* map)
{
// <colgroup>
hb_set_tag_names* colgroup = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(colgroup, "col");
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
// <datalist>
hb_set_tag_names* datalist = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(datalist, "option");
hb_map_tag_relations_set_whole_literal(map, "datalist", datalist);
// <dl>
hb_set_tag_names* dl = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(dl, "dt");
hb_set_tag_names_add_whole_literal(dl, "dd");
hb_map_tag_relations_set_whole_literal(map, "dl", dl);
// <hgroup>
hb_set_tag_names* hgroup = hb_set_tag_names_create();
hb_rule_tag_heading_add_elems(hgroup);
hb_map_tag_relations_set_whole_literal(map, "hgroup", hgroup);
// <ol>
hb_set_tag_names* ol = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(ol, "li");
hb_map_tag_relations_set_whole_literal(map, "ol", ol);
// <optgroup>
hb_set_tag_names* optgroup = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(optgroup, "option");
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
// <picture>
hb_set_tag_names* picture = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(picture, "source");
hb_set_tag_names_add_whole_literal(picture, "img");
hb_map_tag_relations_set_whole_literal(map, "picture", picture);
// <select>
hb_set_tag_names* select = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(select, "optgroup");
hb_set_tag_names_add_whole_literal(select, "option");
hb_map_tag_relations_set_whole_literal(map, "select", select);
// <table>
hb_set_tag_names* table = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(table, "caption");
hb_set_tag_names_add_whole_literal(table, "colgroup");
hb_set_tag_names_add_whole_literal(table, "col");
hb_set_tag_names_add_whole_literal(table, "thead");
hb_set_tag_names_add_whole_literal(table, "tbody");
hb_set_tag_names_add_whole_literal(table, "tfoot");
hb_set_tag_names_add_whole_literal(table, "tr");
hb_map_tag_relations_set_whole_literal(map, "table", table);
// <tbody>
hb_set_tag_names* tbody = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(tbody, "tr");
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
// <tfoot>
hb_set_tag_names* tfoot = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(tfoot, "tr");
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
// <thead>
hb_set_tag_names* thead = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(thead, "tr");
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
// <tr>
hb_set_tag_names* tr = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(tr, "td");
hb_set_tag_names_add_whole_literal(tr, "th");
hb_set_tag_names_add_whole_literal(tr, "template");
hb_set_tag_names_add_whole_literal(tr, "script");
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
// <ul>
hb_set_tag_names* ul = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(ul, "li");
hb_map_tag_relations_set_whole_literal(map, "ul", ul);
}
void hb_rule_tag_child_whitelist_init(void)
{
hb_rule_tag_child_whitelist_map = hb_map_tag_relations_create();
hb_rule_tag_child_whitelist_map_add_entries(
hb_rule_tag_child_whitelist_map);
}
// Check if a parent is allowed to have a specific child.
bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent,
nh_view_str* child)
{
hb_set_tag_names* set = hb_map_tag_relations_get(
hb_rule_tag_child_whitelist_map, parent);
return set == NULL || hb_set_tag_names_has(set, child);
}

View File

@ -1,215 +0,0 @@
#### Beginning and end
```html
<p>
··The·quick·brown·fox↵
</p>
```
#### Between text and tags
```html
<p>The·quick·brown·fox·<strong>jumps</strong>·over·the·lazy·dog.</p>
```
#### Contiguous
```html
<select>
··<option>Jan:·········1</option>
··<option>Feb:········10</option>
··<option>Mar:·······100</option>
··<option>Apr:······1000</option>
··<option>May:·····10000</option>
··<option>Jun:····100000</option>
</select>
```
#### Whole text
```html
<p>
···↵
</p>
```
### Tag classification
|Type|Content|
|---|---|
|Formatting tags|Text nodes|
|Content tags|Formatting tags, text nodes|
|Layout tags|Layout tags, content tags|
|Content-first tags|Content of content tags or layout tags (but not both)|
#### Specific tags
Tags not in one of the categories below are **specific tags**.
#### Formatting tags
```html
<strong> moat </strong>
```
#### Content tags
```html
<p>Some <strong>content</strong></p>
```
#### Content-first tags
```html
<li>Anthony</li>
```
```html
<li>
<div>
</div>
</li>
```
#### Layout tags
```html
<div>
<div></div>
</div>
```
### Options
For options that have a list of tags as their value, the tags should be separated by a comma.
An `*` (asterisk, U+002A) can be used to represent the complete set of possible tags. Providing no value represents the empty set.
Both values essentially fully enables or disables the option.
For brevity, hyperbuild has built-in sets of tags that can be used in place of declaring all their members; they begin with a `$` sign:
|Name|Tags|Source|
|---|---|---|
|`$content`|`address`, `audio`, `button`, `canvas`, `caption`, `figcaption`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `legend`, `meter`, `object`, `option`, `p`, `summary`, `textarea`, `video`|[contenttags.c](src/main/c/rule/tag/contenttags.c)|
|`$contentfirst`|`dd`, `details`, `dt`, `iframe`, `label`, `li`, `noscript`, `output`, `progress`, `slot`, `td`, `template`, `th`|[contentfirsttags.c](src/main/c/rule/tag/contentfirsttags.c)|
|`$formatting`|`a`, `abbr`, `b`, `bdi`, `bdo`, `cite`, `data`, `del`, `dfn`, `em`, `i`, `ins`, `kbd`, `mark`, `q`, `rp`, `rt`, `rtc`, `ruby`, `s`, `samp`, `small`, `span`, `strong`, `sub`, `sup`, `time`, `u`, `var`, `wbr`|[formattingtags.c](src/main/c/rule/tag/formattingtags.c)|
|`$layout`|`blockquote`, `body`, `colgroup`, `datalist`, `dialog`, `div`, `dl`, `fieldset`, `figure`, `footer`, `form`, `head`, `header`, `hgroup`, `html`, `main`, `map`, `menu`, `nav`, `ol`, `optgroup`, `picture`, `section`, `select`, `table`, `tbody`, `tfoot`, `thead`, `tr`, `ul`|[layouttags.c](src/main/c/rule/tag/layouttags.c)|
|`$specific`|All [SVG tags](src/main/c/rule/tag/svgtags.c), `area`, `base`, `br`, `code`, `col`, `embed`, `hr`, `img`, `input`, `param`, `pre`, `script`, `source`, `track`|[specifictags.c](src/main/c/rule/tag/specifictags.c)|
|`$heading`|`hgroup`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`|[headingtags.c](src/main/c/rule/tag/headingtags.c)|
|`$media`|`audio`, `video`|[mediatags.c](src/main/c/rule/tag/mediatags.c)|
|`$sectioning`|`article`, `aside`, `nav`, `section`|[sectioningtags.c](src/main/c/rule/tag/sectioningtags.c)|
|`$void`|`area`, `base`, `br`, `col`, `embed`, `hr`, `img`, `input`, `keygen`, `link`, `meta`, `param`, `source`, `track`, `wbr`|[voidtags.c](src/main/c/rule/tag/voidtags.c)|
|`$wss`|`pre`, `code`|[wsstags.c](src/main/c/rule/tag/wsstags.c)|
As an example, for `--MXcollapseWhitespace`, here are some possible values:
|Arguments|Description|
|---|---|
|`--MXcollapseWhitespace $wss`|Collapse whitespace in all tags except `$wss` ones|
|`--MXcollapseWhitespace $content,$wss`|Collapse whitespace in all tags except `$content` and `$wss` ones|
|`--MXcollapseWhitespace $content,$wss,dd`|Collapse whitespace in all tags except `$content` and `$wss` ones, as well as the `dd` tag|
|`--MXcollapseWhitespace sup,dd`|Collapse whitespace in all tags except `sup` and `dd`|
|`--MXcollapseWhitespace`|Collapse whitespace in all tags|
|`--MXcollapseWhitespace *`|Don't collapse whitespace in any tag|
#### `--MXcollapseWhitespace $wss`
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020), unless they are a child of the tags specified by this option.
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<p>
··The·quick·brown·fox↵
··jumps·over·the·lazy↵
··dog.↵
</p>
```
<td>
```html
<p>·The·quick·brown·fox·jumps·over·the·lazy·dog.·</p>
```
</table>
#### `--MXdestroyWholeWhitespace $wss,$content,$formatting`
Remove any text nodes that only consist of whitespace characters, unless they are a child of the tags specified by this option.
Especially useful when using `display: inline-block` so that whitespace between elements (e.g. indentation) does not alter layout and styling.
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<div>
··<h1></h1>
··<ul></ul>
··A·quick·<strong>brown</strong>·<em>fox</em>.↵
</div>
```
<td>
```html
<div><h1></h1><ul></ul>
··A·quick·<strong>brown</strong><em>fox</em>.↵
</div>
```
</table>
#### `--MXtrimWhitespace $wss,$formatting`
Remove any whitespace from the start and end of a tag, if the first and/or last node is a text node, unless the tag is one of the tags specified by this option.
Useful when combined with whitespace collapsing.
Other whitespace between text nodes and tags are not removed, as it is not recommended to mix non-formatting tags with raw text.
Basically, a tag should only either contain text and [formatting tags](#formatting-tags), or only non-formatting tags.
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<p>
··Hey,·I·<em>just</em>·found↵
··out·about·this·<strong>cool</strong>·website!↵
··<div></div>
</p>
```
<td>
```html
<p>Hey,·I·<em>just</em>·found↵
··out·about·this·<strong>cool</strong>·website!↵
··<div></div></p>
```
</table>
#### `--MXtrimClassAttribute`
Don't trim and collapse whitespace in `class` attribute values.
<table><thead><tr><th>Before<th>After<tbody><tr><td>
```html
<div class="
hi
lo
a b c
d e
f g
"></div>
```
<td>
```html
<div class="hi lo a b c d e f g"></div>
```
</table>

View File

@ -1,20 +0,0 @@
#include <hb/collection.h>
#include <hb/rule.h>
#include <hb/rune.h>
void hb_rule_tag_parent_blacklist_init(void)
{
// Don't do anything. This rule is currently unused.
}
// Check if a child is allowed to have a specific parent, based on the
// blacklist.
bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child,
nh_view_str* parent)
{
// Since this rule is currently not being used, directly allow without
// any checks.
(void) child;
(void) parent;
return true;
}

View File

@ -1,151 +0,0 @@
#include <hb/collection.h>
#include <hb/rule.h>
#include <hb/rune.h>
static hb_map_tag_relations* hb_rule_tag_parent_whitelist_map;
void hb_rule_tag_parent_whitelist_map_add_entries(hb_map_tag_relations* map)
{
// <caption>
hb_set_tag_names* caption = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(caption, "table");
hb_map_tag_relations_set_whole_literal(map, "caption", caption);
// <col>
hb_set_tag_names* col = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(col, "table");
hb_set_tag_names_add_whole_literal(col, "colgroup");
hb_map_tag_relations_set_whole_literal(map, "col", col);
// <colgroup>
hb_set_tag_names* colgroup = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(colgroup, "table");
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
// <dd>
hb_set_tag_names* dd = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(dd, "dl");
hb_map_tag_relations_set_whole_literal(map, "dd", dd);
// <dt>
hb_set_tag_names* dt = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(dt, "dl");
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
// <figcaption>
hb_set_tag_names* figcaption = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(figcaption, "figure");
hb_map_tag_relations_set_whole_literal(map, "figcaption", figcaption);
// <legend>
hb_set_tag_names* legend = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(legend, "fieldset");
hb_map_tag_relations_set_whole_literal(map, "legend", legend);
// <li>
hb_set_tag_names* li = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(li, "ul");
hb_set_tag_names_add_whole_literal(li, "ol");
hb_set_tag_names_add_whole_literal(li, "menu");
hb_map_tag_relations_set_whole_literal(map, "li", li);
// <optgroup>
hb_set_tag_names* optgroup = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(optgroup, "select");
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
// <option>
hb_set_tag_names* option = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(option, "select");
hb_set_tag_names_add_whole_literal(option, "optgroup");
hb_set_tag_names_add_whole_literal(option, "datalist");
hb_map_tag_relations_set_whole_literal(map, "option", option);
// <param>
hb_set_tag_names* param = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(param, "object");
hb_map_tag_relations_set_whole_literal(map, "param", param);
// <rp>
hb_set_tag_names* rp = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(rp, "ruby");
hb_map_tag_relations_set_whole_literal(map, "rp", rp);
// <rt>
hb_set_tag_names* rt = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(rt, "ruby");
hb_map_tag_relations_set_whole_literal(map, "rt", rt);
// <rtc>
hb_set_tag_names* rtc = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(rtc, "ruby");
hb_map_tag_relations_set_whole_literal(map, "rtc", rtc);
// <summary>
hb_set_tag_names* summary = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(summary, "details");
hb_map_tag_relations_set_whole_literal(map, "summary", summary);
// <source>
hb_set_tag_names* source = hb_set_tag_names_create();
hb_rule_tag_media_add_elems(source);
hb_set_tag_names_add_whole_literal(source, "picture");
hb_map_tag_relations_set_whole_literal(map, "source", source);
// <track>
hb_set_tag_names* track = hb_set_tag_names_create();
hb_rule_tag_media_add_elems(track);
hb_map_tag_relations_set_whole_literal(map, "track", track);
// <tbody>
hb_set_tag_names* tbody = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(tbody, "table");
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
// <td>
hb_set_tag_names* td = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(td, "tr");
hb_map_tag_relations_set_whole_literal(map, "td", td);
// <tfoot>
hb_set_tag_names* tfoot = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(tfoot, "table");
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
// <th>
hb_set_tag_names* th = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(th, "tr");
hb_map_tag_relations_set_whole_literal(map, "th", th);
// <thead>
hb_set_tag_names* thead = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(thead, "table");
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
// <tr>
hb_set_tag_names* tr = hb_set_tag_names_create();
hb_set_tag_names_add_whole_literal(tr, "table");
hb_set_tag_names_add_whole_literal(tr, "thead");
hb_set_tag_names_add_whole_literal(tr, "tbody");
hb_set_tag_names_add_whole_literal(tr, "tfoot");
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
// <template>
// Should be <body>, <frameset>, <head>, <dl>, <colgroup>, but ignoring.
}
void hb_rule_tag_parent_whitelist_init(void)
{
hb_rule_tag_parent_whitelist_map = hb_map_tag_relations_create();
hb_rule_tag_parent_whitelist_map_add_entries(
hb_rule_tag_parent_whitelist_map);
}
// Check if a child is allowed to have a specific parent.
bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child,
nh_view_str* parent)
{
hb_set_tag_names* set = hb_map_tag_relations_get(
hb_rule_tag_parent_whitelist_map, child);
return set == NULL || hb_set_tag_names_has(set, parent);
}

41
notes/Tag types.md Normal file
View File

@ -0,0 +1,41 @@
# Tag types
|Type|Expected content|
|---|---|
|Formatting tags|Text nodes.|
|Content tags|Formatting tags, text nodes.|
|Layout tags|Layout tags, content tags.|
|Content-first tags|Content of content tags or layout tags (but not both).|
## Formatting tags
```html
<strong> moat </strong>
```
## Content tags
```html
<p>Some <strong>content</strong></p>
```
## Content-first tags
```html
<li>Anthony</li>
```
```html
<li>
<div>
</div>
</li>
```
## Layout tags
```html
<div>
<div></div>
</div>
```

View File

@ -1,6 +1,7 @@
use ::phf::{phf_set, Set};
// Difference to MDN's inline text semantics list: -br, +del, +ins.
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element#Inline_text_semantics.
// Differences to tags listed in table at above URL: -br, +del, +ins.
pub static FORMATTING_TAGS: Set<&'static [u8]> = phf_set! {
b"a",
b"abbr",

View File

@ -1,11 +0,0 @@
use ::phf::{phf_set, Set};
pub static HEADING_TAGS: Set<&'static [u8]> = phf_set! {
b"hgroup",
b"h1",
b"h2",
b"h3",
b"h4",
b"h5",
b"h6",
};

View File

@ -1,148 +0,0 @@
use ::phf::{phf_set, Set};
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element at 2018-07-01T05:55:00Z.
pub static HTML_TAGS: Set<&'static [u8]> = phf_set! {
b"a",
b"abbr",
b"acronym",
b"address",
b"applet",
b"area",
b"article",
b"aside",
b"audio",
b"b",
b"basefont",
b"bdi",
b"bdo",
b"bgsound",
b"big",
b"blink",
b"blockquote",
b"body",
b"br",
b"button",
b"canvas",
b"caption",
b"center",
b"cite",
b"code",
b"col",
b"colgroup",
b"command",
b"content",
b"data",
b"datalist",
b"dd",
b"del",
b"details",
b"dfn",
b"dialog",
b"dir",
b"div",
b"dl",
b"dt",
b"element",
b"em",
b"embed",
b"fieldset",
b"figcaption",
b"figure",
b"font",
b"footer",
b"form",
b"frame",
b"frameset",
b"h1",
b"h2",
b"h3",
b"h4",
b"h5",
b"h6",
b"head",
b"header",
b"hgroup",
b"hr",
b"html",
b"i",
b"iframe",
b"image",
b"img",
b"input",
b"ins",
b"isindex",
b"kbd",
b"keygen",
b"label",
b"legend",
b"li",
b"link",
b"listing",
b"main",
b"map",
b"mark",
b"marquee",
b"menu",
b"menuitem",
b"meta",
b"meter",
b"multicol",
b"nav",
b"nextid",
b"nobr",
b"noembed",
b"noframes",
b"noscript",
b"object",
b"ol",
b"optgroup",
b"option",
b"output",
b"p",
b"param",
b"picture",
b"plaintext",
b"pre",
b"progress",
b"q",
b"rp",
b"rt",
b"rtc",
b"ruby",
b"s",
b"samp",
b"script",
b"section",
b"select",
b"shadow",
b"slot",
b"small",
b"source",
b"spacer",
b"span",
b"strike",
b"strong",
b"style",
b"sub",
b"summary",
b"sup",
b"table",
b"tbody",
b"td",
b"template",
b"textarea",
b"tfoot",
b"th",
b"thead",
b"time",
b"title",
b"tr",
b"track",
b"tt",
b"u",
b"ul",
b"var",
b"video",
b"wbr",
b"xmp",
};

View File

@ -1,12 +1,8 @@
use ::phf::{phf_set, Set};
pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
// Sectioning tags.
b"article",
b"aside",
b"nav",
b"section",
// Other tags.
b"blockquote",
b"body",
b"colgroup",
@ -25,9 +21,11 @@ pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
b"main",
b"map",
b"menu",
b"nav",
b"ol",
b"optgroup",
b"picture",
b"section",
b"select",
b"table",
b"tbody",

View File

@ -1,6 +0,0 @@
use ::phf::{phf_set, Set};
pub static MEDIA_TAGS: Set<&'static [u8]> = phf_set! {
b"audio",
b"video",
};

View File

@ -1,9 +0,0 @@
use ::phf::{phf_set, Set};
pub static SECTIONING_TAGS: Set<&'static [u8]> = phf_set! {
// Also used by layout tags.
b"article",
b"aside",
b"nav",
b"section",
};

View File

@ -1,19 +0,0 @@
use ::phf::{phf_set, Set};
// Does not include SVG tags.
pub static SPECIFIC_HTML_TAGS: Set<&'static [u8]> = phf_set! {
b"area",
b"base",
b"br",
b"code", // Reason: unlikely to want to minify.
b"col",
b"embed",
b"hr",
b"img",
b"input",
b"param",
b"pre", // Reason: unlikely to want to minify.
b"script",
b"source",
b"track",
};

View File

@ -1,95 +0,0 @@
use ::phf::{phf_set, Set};
// Sourced from https://developer.mozilla.org/en-US/docs/Web/SVG/Element at 2018-08-04T03:50:00Z.
pub static SVG_TAGS: Set<&'static [u8]> = phf_set! {
b"a",
b"altGlyph",
b"altGlyphDef",
b"altGlyphItem",
b"animate",
b"animateColor",
b"animateMotion",
b"animateTransform",
b"circle",
b"clipPath",
b"color-profile",
b"cursor",
b"defs",
b"desc",
b"discard",
b"ellipse",
b"feBlend",
b"feColorMatrix",
b"feComponentTransfer",
b"feComposite",
b"feConvolveMatrix",
b"feDiffuseLighting",
b"feDisplacementMap",
b"feDistantLight",
b"feDropShadow",
b"feFlood",
b"feFuncA",
b"feFuncB",
b"feFuncG",
b"feFuncR",
b"feGaussianBlur",
b"feImage",
b"feMerge",
b"feMergeNode",
b"feMorphology",
b"feOffset",
b"fePointLight",
b"feSpecularLighting",
b"feSpotLight",
b"feTile",
b"feTurbulence",
b"filter",
b"font-face-format",
b"font-face-name",
b"font-face-src",
b"font-face-uri",
b"font-face",
b"font",
b"foreignObject",
b"g",
b"glyph",
b"glyphRef",
b"hatch",
b"hatchpath",
b"hkern",
b"image",
b"line",
b"linearGradient",
b"marker",
b"mask",
b"mesh",
b"meshgradient",
b"meshpatch",
b"meshrow",
b"metadata",
b"missing-glyph",
b"mpath",
b"path",
b"pattern",
b"polygon",
b"polyline",
b"radialGradient",
b"rect",
b"script",
b"set",
b"solidcolor",
b"stop",
b"style",
b"svg",
b"switch",
b"symbol",
b"text",
b"textPath",
b"title",
b"tref",
b"tspan",
b"unknown",
b"use",
b"view",
b"vkern",
};

View File

@ -1,6 +1,6 @@
// "WSS" stands for whitespace-sensitive.
use ::phf::{phf_set, Set};
// "WSS" stands for whitespace-sensitive.
pub static WSS_TAGS: Set<&'static [u8]> = phf_set! {
b"code",
b"pre",