Complete refactoring; move CLI to separate project
This commit is contained in:
parent
104e55c778
commit
ba03841945
|
@ -5,10 +5,22 @@ set(CMAKE_C_STANDARD 11)
|
|||
|
||||
include_directories(src lib)
|
||||
|
||||
add_executable(hyperbuild
|
||||
add_library(nicehash
|
||||
lib/nicehash/bitfield-ascii.c
|
||||
lib/nicehash/list-char.c
|
||||
lib/nicehash/list-list-char.c
|
||||
lib/nicehash/list-uchar.c
|
||||
lib/nicehash/set-int32.c
|
||||
lib/nicehash/set-int64.c
|
||||
lib/nicehash/set-str.c
|
||||
lib/nicehash/set-view-str.c
|
||||
lib/nicehash/view-str.c)
|
||||
|
||||
add_library(hyperbuild
|
||||
src/hb/cfg.c
|
||||
src/hb/collection.c
|
||||
src/hb/err.c
|
||||
src/hb/hyperbuild.c
|
||||
src/hb/proc/accept.c
|
||||
src/hb/proc/bounds.c
|
||||
src/hb/proc/error.c
|
||||
|
@ -59,3 +71,9 @@ add_executable(hyperbuild
|
|||
src/hb/unit/entity.c
|
||||
src/hb/unit/tag.c
|
||||
src/hb/unit/tag/name.c)
|
||||
target_link_libraries(hyperbuild nicehash)
|
||||
|
||||
add_executable(hyperbuild-test
|
||||
test/hb/unit/attr/val/quoted.test.c)
|
||||
target_link_libraries(hyperbuild-test hyperbuild)
|
||||
target_include_directories(hyperbuild-test PRIVATE test)
|
||||
|
|
397
README.md
397
README.md
|
@ -1,183 +1,40 @@
|
|||
# hyperbuild
|
||||
|
||||
A fast HTML minifier written in C, heavily influenced by [kangax's html-minifier](https://github.com/kangax/html-minifier).
|
||||
A fast one-pass in-place HTML minifier written in C with advanced whitespace handling.
|
||||
|
||||
Available in different flavours:
|
||||
|
||||
- Standalone 64-bit Linux executable (this)
|
||||
- [Node.js](https://github.com/wilsonzlin/hyperbuild-nodejs)
|
||||
- [Express](https://github.com/wilsonzlin/hyperbuild-express)
|
||||
- [Webpack](https://github.com/wilsonzlin/hyperbuild-webpack)
|
||||
- [Apache](https://github.com/wilsonzlin/hyperbuild-apache)
|
||||
- [Nginx](https://github.com/wilsonzlin/hyperbuild-nginx)
|
||||
Currently in beta, working on documentation and tests. Issues and pull requests welcome!
|
||||
|
||||
## Features
|
||||
|
||||
### Streaming minification
|
||||
- Minification is done in one pass with no backtracking or DOM/AST building.
|
||||
- No dynamic memory is allocated during processing, which increases safety and performance.
|
||||
- Advanced whitespace handling options allow maximum minification while retaining valid spaces.
|
||||
|
||||
hyperbuild minifies as it parses, directly streaming processed HTML to the output without having to build a DOM/AST or iterate/traverse around in multiple passes, allowing for super-fast compilation times and near-constant memory usage.
|
||||
## Usage
|
||||
|
||||
### Super fast
|
||||
This is the library. To use hyperbuild, you'll probably need one of these:
|
||||
|
||||
hyperbuild is written in C, and uses technologies like Emscripten and Cython to preserve performance in higher-level languages.
|
||||
- [hyperbuild CLI](https://github.com/wilsonzlin/hyperbuild-cli)
|
||||
|
||||
### Smart whitespace handling
|
||||
Documentation for the library itself is currently WIP.
|
||||
|
||||
hyperbuild has advanced whitespace minification with smart defaults that leaves whitespace untouched in `pre` and `code`, trims and collapses them in content tags, and removes them in layout tags allowing the use of `inline-block` without ugly syntax or CSS hacks.
|
||||
hyperbuild uses the following dependencies, which are included as submodules:
|
||||
|
||||
## Parsing
|
||||
|
||||
Current limitations:
|
||||
|
||||
- UTF-8 in, UTF-8 out, no BOM.
|
||||
- Not aware of exotic Unicode whitespace characters.
|
||||
- Tested and designed for Linux only.
|
||||
- Follows HTML5 only.
|
||||
|
||||
### Errors
|
||||
|
||||
Errors marked with a `⌫` can be suppressed using the [`--suppress`](#--suppress) option.
|
||||
Use the error name without the `HBE_PARSE_` prefix.
|
||||
|
||||
#### `HBE_PARSE_MALFORMED_ENTITY` ⌫
|
||||
|
||||
It's an error if the sequence of characters following an ampersand (`&`) does not form a valid entity.
|
||||
|
||||
Entities must be of one of the following forms:
|
||||
|
||||
- `&name;`, where *name* is a reference to a valid HTML entity
|
||||
- `&nnnn;`, where *nnnn* is a Unicode code point in base 10
|
||||
- `&#xhhhh;`, where *hhhh* is a Unicode code point in base 16
|
||||
|
||||
A malformed entity is an ampersand not followed by a sequence of characters that matches one of the above forms. This includes when the semicolon is missing.
|
||||
|
||||
Note that this is different from `HBE_PARSE_INVALID_ENTITY`, which is when a well-formed entity references a non-existent entity name or Unicode code point.
|
||||
|
||||
While an ampersand by itself (i.e. followed by whitespace or as the last character) is a malformed entity, it is covered by `HBE_PARSE_BARE_AMPERSAND`.
|
||||
|
||||
#### `HBE_PARSE_BARE_AMPERSAND` ⌫
|
||||
|
||||
It's an error to have an ampersand followed by whitespace or as the last character.
|
||||
|
||||
This is intentionally a different error to `HBE_PARSE_MALFORMED_ENTITY` due to the ubiquity of bare ampersands.
|
||||
|
||||
An ampersand by itself is not *necessarily* an invalid entity. However, HTML parsers and browsers may have different interpretations of bare ampersands, so it's a good idea to always use the encoded form (`&`).
|
||||
|
||||
When this error is suppressed, bare ampersands are outputted untouched.
|
||||
|
||||
#### `HBE_PARSE_INVALID_ENTITY` ⌫
|
||||
|
||||
It's an error if an invalid HTML entity is detected.
|
||||
|
||||
If suppressed, invalid entities are outputted untouched.
|
||||
|
||||
See [entityrefs.c](src/main/c/rule/entity/entityrefs.c) for the list of entity references considered valid by hyperbuild.
|
||||
|
||||
Valid entities that reference a Unicode code point must be between 0x0 and 0x10FFFF (inclusive).
|
||||
|
||||
#### `HBE_PARSE_NONSTANDARD_TAG` ⌫
|
||||
|
||||
It's an error if an unknown (non-standard) tag is reached.
|
||||
See [tags.c](src/main/c/rule/tag/tags.c) for the list of tags considered valid by hyperbuild.
|
||||
|
||||
#### `HBE_PARSE_UCASE_TAG` ⌫
|
||||
|
||||
It's an error if an opening or closing tag's name has any uppercase characters.
|
||||
|
||||
#### `HBE_PARSE_UCASE_ATTR` ⌫
|
||||
|
||||
It's an error if an attribute's name has any uppercase characters.
|
||||
|
||||
#### `HBE_PARSE_UNQUOTED_ATTR` ⌫
|
||||
|
||||
It's an error if an attribute's value is not quoted with `"` (U+0022) or `'` (U+0027).
|
||||
This means that `` ` `` is not a valid quote mark regardless of whether this error is suppressed or not. Backticks are valid attribute value quotes in Internet Explorer.
|
||||
|
||||
#### `HBE_PARSE_ILLEGAL_CHILD`
|
||||
|
||||
It's an error if a tag is declared where it can't be a child of.
|
||||
This is a very simple check, and does not cover the comprehensive HTML rules, which involve backtracking, tree traversal, and lots of conditionals.
|
||||
|
||||
This rule is enforced in four parts:
|
||||
[whitelistparents.c](src/main/c/rule/relation/whitelistparents.c),
|
||||
[blacklistparents.c](src/main/c/rule/relation/blacklistparents.c),
|
||||
[whitelistchildren.c](src/main/c/rule/relation/whitelistchildren.c), and
|
||||
[blacklistchildren.c](src/main/c/rule/relation/blacklistchildren.c).
|
||||
|
||||
#### `HBE_PARSE_UNCLOSED_TAG`
|
||||
|
||||
It's an error if a non-void tag is not closed.
|
||||
See [voidtags.c](src/main/c/rule/tag/voidtags.c) for the list of tags considered void by hyperbuild.
|
||||
|
||||
This includes tags that close automatically because of siblings (e.g. `<li><li>`), as it greatly simplifies the complexity of the minifier due to guarantees about the structure.
|
||||
|
||||
#### `HBE_PARSE_SELF_CLOSING_TAG` ⌫
|
||||
|
||||
It's an error if a tag is self-closed. Valid in XML, not in HTML.
|
||||
|
||||
#### `HBE_PARSE_NO_SPACE_BEFORE_ATTR`
|
||||
|
||||
It's an error if there is no whitespace before an attribute.
|
||||
|
||||
Most likely, the cause of this error is either invalid syntax or something like:
|
||||
|
||||
```html
|
||||
<div class="a"name="1"></div>
|
||||
```
|
||||
|
||||
(Note the lack of space between the end of the `class` attribute and the beginning of the `name` attribute.)
|
||||
|
||||
#### `HBE_PARSE_UNEXPECTED_END` and `HBE_PARSE_EXPECTED_NOT_FOUND`
|
||||
|
||||
General syntax errors.
|
||||
|
||||
#### Additional errors
|
||||
|
||||
There are additional implicit errors that are considered as general syntax errors due to the way the parser works:
|
||||
|
||||
- Closing void tags; see [voidtags.c](src/main/c/rule/tag/voidtags.c) for the list of tags considered void by hyperbuild.
|
||||
- Placing whitespace between `=` and attribute names/values.
|
||||
- Placing whitespace before the tag name in an opening tag.
|
||||
- Placing whitespace around the tag name in a closing tag.
|
||||
- Not closing a tag before the end of the file/input.
|
||||
|
||||
#### Notes
|
||||
|
||||
- Closing `</script>` tags end single-line and multi-line JavaScript comments in `script` tags.
|
||||
For this to be detected by hyperbuild, the closing tag must not contain any whitespace (e.g. `</script >`).
|
||||
|
||||
### Options
|
||||
|
||||
#### `--in`
|
||||
|
||||
Path to a file to process. If omitted, hyperbuild will read from `stdin`.
|
||||
|
||||
#### `--out`
|
||||
|
||||
Path to a file to write to; it will be created if it doesn't exist already. If omitted, the output will be streamed to `stdout`.
|
||||
|
||||
#### `--keep`
|
||||
|
||||
Don't automatically delete the output file if an error occurred. If the output is `stdout`, or the output is a file but `--buffer` is provided, this option does nothing.
|
||||
|
||||
#### `--buffer`
|
||||
|
||||
Buffer all output until the process is complete and successful. This won't truncate or write anything to the output until the build process is done, but will use a non-constant amount of memory.
|
||||
This applies even when the output is `stdout`.
|
||||
|
||||
#### `--suppress`
|
||||
|
||||
Suppress errors specified by this option. hyperbuild will quitely ignore and continue processing when otherwise one of the provided errors would occur.
|
||||
|
||||
Suppressible errors are marked with a `⌫` in the [Errors](#errors) section. Omit the `HBE_PARSE_` prefix. Separate the error names with commas.
|
||||
- [nicehash](https://github.com/wilsonzlin/nicehash)
|
||||
|
||||
## Minification
|
||||
|
||||
### Theory
|
||||
Guide below is currently WIP.
|
||||
|
||||
#### Whitespace
|
||||
### Whitespace
|
||||
|
||||
##### Beginning and end
|
||||
hyperbuild has advanced whitespace minification that can allow strategies such as:
|
||||
|
||||
- leave whitespace untouched in `pre` and `code`, which are whitespace sensitive
|
||||
- trim and collapse whitespace in content tags, as whitespace is collapsed anyway when rendered
|
||||
- remove whitespace in layout tags, which allows the use of inline layouts while keeping formatted code
|
||||
|
||||
#### Beginning and end
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
|
@ -185,13 +42,13 @@ Suppressible errors are marked with a `⌫` in the [Errors](#errors) section. Om
|
|||
</p>
|
||||
```
|
||||
|
||||
##### Between text and tags
|
||||
#### Between text and tags
|
||||
|
||||
```html
|
||||
<p>The·quick·brown·fox·<strong>jumps</strong>·over·the·lazy·dog.</p>
|
||||
```
|
||||
|
||||
##### Contiguous
|
||||
#### Contiguous
|
||||
|
||||
```html
|
||||
<select>↵
|
||||
|
@ -204,7 +61,7 @@ Suppressible errors are marked with a `⌫` in the [Errors](#errors) section. Om
|
|||
</select>
|
||||
```
|
||||
|
||||
##### Whole text
|
||||
#### Whole text
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
|
@ -212,25 +69,32 @@ Suppressible errors are marked with a `⌫` in the [Errors](#errors) section. Om
|
|||
</p>
|
||||
```
|
||||
|
||||
#### Content
|
||||
### Tag classification
|
||||
|
||||
##### Specific tags
|
||||
|Type|Content|
|
||||
|---|---|
|
||||
|Formatting tags|Text nodes|
|
||||
|Content tags|Formatting tags, text nodes|
|
||||
|Layout tags|Layout tags, content tags|
|
||||
|Content-first tags|Content of content tags or layout tags (but not both)|
|
||||
|
||||
#### Specific tags
|
||||
|
||||
Tags not in one of the categories below are **specific tags**.
|
||||
|
||||
##### Formatting tags
|
||||
#### Formatting tags
|
||||
|
||||
```html
|
||||
<strong> moat </strong>
|
||||
```
|
||||
|
||||
##### Content tags
|
||||
#### Content tags
|
||||
|
||||
```html
|
||||
<p>Some <strong>content</strong></p>
|
||||
```
|
||||
|
||||
##### Content-first tags
|
||||
#### Content-first tags
|
||||
|
||||
```html
|
||||
<li>Anthony</li>
|
||||
|
@ -243,194 +107,21 @@ Tags not in one of the categories below are **specific tags**.
|
|||
</li>
|
||||
```
|
||||
|
||||
##### Layout tags
|
||||
#### Layout tags
|
||||
|
||||
```html
|
||||
<div>
|
||||
<div></div>
|
||||
</div>
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
##### Overview
|
||||
Currently, hyperbuild has a few limitations:
|
||||
|
||||
|Type|Content|
|
||||
|---|---|
|
||||
|Formatting tags|Text nodes|
|
||||
|Content tags|Formatting tags, text nodes|
|
||||
|Layout tags|Layout tags, content tags|
|
||||
|Content-first tags|Content of content tags or layout tags (but not both)|
|
||||
- Only UTF-8 is supported.
|
||||
- Not aware of exotic Unicode whitespace characters.
|
||||
- Follows HTML5 only.
|
||||
- Only works on Linux.
|
||||
|
||||
### Options
|
||||
|
||||
Note that only existing whitespace will be up for removal via minification. Entities that represent whitespace will not be decoded and then removed.
|
||||
|
||||
For options that have a list of tags as their value, the tags should be separated by a comma.
|
||||
|
||||
An `*` (asterisk, U+002A) can be used to represent the complete set of possible tags. Providing no value represents the empty set.
|
||||
Both values essentially fully enables or disables the option.
|
||||
|
||||
For brevity, hyperbuild has built-in sets of tags that can be used in place of declaring all their members; they begin with a `$` sign:
|
||||
|
||||
|Name|Tags|Source|
|
||||
|---|---|---|
|
||||
|`$content`|`address`, `audio`, `button`, `canvas`, `caption`, `figcaption`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `legend`, `meter`, `object`, `option`, `p`, `summary`, `textarea`, `video`|[contenttags.c](src/main/c/rule/tag/contenttags.c)|
|
||||
|`$contentfirst`|`dd`, `details`, `dt`, `iframe`, `label`, `li`, `noscript`, `output`, `progress`, `slot`, `td`, `template`, `th`|[contentfirsttags.c](src/main/c/rule/tag/contentfirsttags.c)|
|
||||
|`$formatting`|`a`, `abbr`, `b`, `bdi`, `bdo`, `cite`, `data`, `del`, `dfn`, `em`, `i`, `ins`, `kbd`, `mark`, `q`, `rp`, `rt`, `rtc`, `ruby`, `s`, `samp`, `small`, `span`, `strong`, `sub`, `sup`, `time`, `u`, `var`, `wbr`|[formattingtags.c](src/main/c/rule/tag/formattingtags.c)|
|
||||
|`$layout`|`blockquote`, `body`, `colgroup`, `datalist`, `dialog`, `div`, `dl`, `fieldset`, `figure`, `footer`, `form`, `head`, `header`, `hgroup`, `html`, `main`, `map`, `menu`, `nav`, `ol`, `optgroup`, `picture`, `section`, `select`, `table`, `tbody`, `tfoot`, `thead`, `tr`, `ul`|[layouttags.c](src/main/c/rule/tag/layouttags.c)|
|
||||
|`$specific`|All [SVG tags](src/main/c/rule/tag/svgtags.c), `area`, `base`, `br`, `code`, `col`, `embed`, `hr`, `img`, `input`, `param`, `pre`, `script`, `source`, `track`|[specifictags.c](src/main/c/rule/tag/specifictags.c)|
|
||||
|`$heading`|`hgroup`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`|[headingtags.c](src/main/c/rule/tag/headingtags.c)|
|
||||
|`$media`|`audio`, `video`|[mediatags.c](src/main/c/rule/tag/mediatags.c)|
|
||||
|`$sectioning`|`article`, `aside`, `nav`, `section`|[sectioningtags.c](src/main/c/rule/tag/sectioningtags.c)|
|
||||
|`$void`|`area`, `base`, `br`, `col`, `embed`, `hr`, `img`, `input`, `keygen`, `link`, `meta`, `param`, `source`, `track`, `wbr`|[voidtags.c](src/main/c/rule/tag/voidtags.c)|
|
||||
|`$wss`|`pre`, `code`|[wsstags.c](src/main/c/rule/tag/wsstags.c)|
|
||||
|
||||
As an example, for `--MXcollapseWhitespace`, here are some possible values:
|
||||
|
||||
|Arguments|Description|
|
||||
|---|---|
|
||||
|`--MXcollapseWhitespace $wss`|Collapse whitespace in all tags except `$wss` ones|
|
||||
|`--MXcollapseWhitespace $content,$wss`|Collapse whitespace in all tags except `$content` and `$wss` ones|
|
||||
|`--MXcollapseWhitespace $content,$wss,dd`|Collapse whitespace in all tags except `$content` and `$wss` ones, as well as the `dd` tag|
|
||||
|`--MXcollapseWhitespace sup,dd`|Collapse whitespace in all tags except `sup` and `dd`|
|
||||
|`--MXcollapseWhitespace`|Collapse whitespace in all tags|
|
||||
|`--MXcollapseWhitespace *`|Don't collapse whitespace in any tag|
|
||||
|
||||
#### `--MXcollapseWhitespace $wss`
|
||||
|
||||
Reduce a sequence of whitespace characters in text nodes to a single space (U+0020), unless they are a child of the tags specified by this option.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··The·quick·brown·fox↵
|
||||
··jumps·over·the·lazy↵
|
||||
··dog.↵
|
||||
</p>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<p>·The·quick·brown·fox·jumps·over·the·lazy·dog.·</p>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXdestroyWholeWhitespace $wss,$content,$formatting`
|
||||
|
||||
Remove any text nodes that only consist of whitespace characters, unless they are a child of the tags specified by this option.
|
||||
|
||||
Especially useful when using `display: inline-block` so that whitespace between elements (e.g. indentation) does not alter layout and styling.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<div>↵
|
||||
··<h1></h1>↵
|
||||
··<ul></ul>↵
|
||||
··A·quick·<strong>brown</strong>·<em>fox</em>.↵
|
||||
</div>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<div><h1></h1><ul></ul>↵
|
||||
··A·quick·<strong>brown</strong><em>fox</em>.↵
|
||||
</div>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXtrimWhitespace $wss,$formatting`
|
||||
|
||||
Remove any whitespace from the start and end of a tag, if the first and/or last node is a text node, unless the tag is one of the tags specified by this option.
|
||||
|
||||
Useful when combined with whitespace collapsing.
|
||||
|
||||
Other whitespace between text nodes and tags are not removed, as it is not recommended to mix non-formatting tags with raw text.
|
||||
|
||||
Basically, a tag should only either contain text and [formatting tags](#formatting-tags), or only non-formatting tags.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<p>↵
|
||||
··Hey,·I·<em>just</em>·found↵
|
||||
··out·about·this·<strong>cool</strong>·website!↵
|
||||
··<div></div>↵
|
||||
</p>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<p>Hey,·I·<em>just</em>·found↵
|
||||
··out·about·this·<strong>cool</strong>·website!↵
|
||||
··<div></div></p>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXtrimClassAttribute`
|
||||
|
||||
Don't trim and collapse whitespace in `class` attribute values.
|
||||
|
||||
<table><thead><tr><th>Before<th>After<tbody><tr><td>
|
||||
|
||||
```html
|
||||
<div class="
|
||||
hi
|
||||
lo
|
||||
a b c
|
||||
d e
|
||||
f g
|
||||
"></div>
|
||||
```
|
||||
|
||||
<td>
|
||||
|
||||
```html
|
||||
<div class="hi lo a b c d e f g"></div>
|
||||
```
|
||||
|
||||
</table>
|
||||
|
||||
#### `--MXdecEnt`
|
||||
|
||||
Don't decode any valid entities into their UTF-8 values.
|
||||
|
||||
#### `--MXcondComments`
|
||||
|
||||
Don't minify the contents of conditional comments, including downlevel-revealed conditional comments.
|
||||
|
||||
#### `--MXattrQuotes`
|
||||
|
||||
Don't remove quotes around attribute values when possible.
|
||||
|
||||
#### `--MXcomments`
|
||||
|
||||
Don't remove any comments. Conditional comments are never removed regardless of this setting.
|
||||
|
||||
#### `--MXoptTags`
|
||||
|
||||
Don't remove optional starting or ending tags.
|
||||
|
||||
#### `--MXtagWS`
|
||||
|
||||
Don't remove spaces between attributes when possible.
|
||||
|
||||
### Non-options
|
||||
|
||||
#### Explicitly important
|
||||
|
||||
The following removal of attributes and tags as minification strategies are not available in hyperbuild, as they should not have been declared in the first place.
|
||||
|
||||
If they exist, it is assumed there is a special reason for being so.
|
||||
|
||||
- Remove empty attributes (including ones that would be empty after minification e.g. `class=" "`)
|
||||
- Remove empty elements
|
||||
- Remove redundant attributes
|
||||
- Remove `type` attribute on `<script>` tags
|
||||
- Remove `type` attribute on `<style>` and `<link>` tags
|
||||
Patches to change any of these welcome!
|
||||
|
|
36
compile.sh
36
compile.sh
|
@ -1,36 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
pushd "$(dirname "$0")" > /dev/null
|
||||
|
||||
set -e
|
||||
|
||||
./preprocess.sh
|
||||
|
||||
npx tsc
|
||||
|
||||
main_js_path="out/main/ts/main.js"
|
||||
ts_gen="$(cat "$main_js_path")"
|
||||
echo "module.exports = {}; var exports = module.exports; $ts_gen" > "$main_js_path";
|
||||
|
||||
mkdir -p out
|
||||
|
||||
cmp_std="--std=c11"
|
||||
cmp_errors="-Wall -Wextra -Werror"
|
||||
cmp_inc="-I tmp/ -I lib/"
|
||||
cmp_optimisation="$([[ "$1" == "--debug" ]] && echo "-O0 -g" || echo "-O3")"
|
||||
cmp_em_debug="$([[ "$1" == "--debug" ]] && echo "-s ASSERTIONS=2" || echo "-s ASSERTIONS=0")"
|
||||
|
||||
gcc $cmp_std $cmp_errors $cmp_optimisation $cmp_inc -o out/hyperbuild "tmp/cli.c"
|
||||
emcc $cmp_std $cmp_errors $cmp_optimisation $cmp_em_debug $cmp_inc -o out/hyperbuild.em.js "tmp/em.c" \
|
||||
-s ALLOW_MEMORY_GROWTH=1 \
|
||||
-s "EXPORTED_FUNCTIONS=['_hbe_err_code', '_hbe_err_message', '_hbe_err_destroy', '_em_init', '_em_entry']" \
|
||||
-s "BINARYEN_TRAP_MODE='clamp'" \
|
||||
--post-js "$main_js_path"
|
||||
|
||||
rm "$main_js_path"
|
||||
mv out/main/ts/main.d.ts out/main.d.ts
|
||||
rm -rf out/main
|
||||
|
||||
popd > /dev/null
|
||||
|
||||
exit 0
|
270
src/hb-cli.c
270
src/hb-cli.c
|
@ -1,270 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <getopt.h>
|
||||
#include "./stream/content/html.c"
|
||||
#include "./__main__.c"
|
||||
|
||||
nh_set_str_t hbu_streamoptions_parse_list_of_tags(hbe_err_t *hbe_err, char *argv) {
|
||||
nh_set_str_t set = NULL;
|
||||
hb_list_charlist_t list = NULL;
|
||||
|
||||
if (argv != NULL && strcmp(argv, "*")) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
set = nh_set_str_create();
|
||||
|
||||
if (argv == NULL) {
|
||||
return set;
|
||||
}
|
||||
|
||||
list = hb_list_charlist_create_from_split((hb_proc_char_t *) argv, ',');
|
||||
|
||||
for (size_t i = 0; i < list->length; i++) {
|
||||
hb_list_char_t part = hb_list_charlist_get(list, i);
|
||||
hb_proc_char_t *part_c = hb_list_char_underlying(part);
|
||||
|
||||
if (hb_list_char_get(part, 0) == '$') {
|
||||
// Set of tags
|
||||
hb_list_char_shift(part);
|
||||
HBE_CATCH_F(hbu_streamoptions_parse_and_add_tag_set, (char *) part_c, set);
|
||||
|
||||
} else {
|
||||
// Single tag
|
||||
if (!hb_rule_tags_check(part_c)) {
|
||||
HBE_THROW_F(HBE_CLI_INVALID_TAG, "%s is not a standard tag and was provided as part of an argument's value", part_c);
|
||||
}
|
||||
nh_set_str_add(set, (char *) hb_list_char_underlying_copy(part));
|
||||
}
|
||||
}
|
||||
|
||||
finally:
|
||||
if (list != NULL) {
|
||||
hb_list_charlist_destroy_from_split(list);
|
||||
list = NULL;
|
||||
}
|
||||
if (*hbe_err != NULL) {
|
||||
if (set != NULL) {
|
||||
nh_set_str_destroy(set);
|
||||
set = NULL;
|
||||
}
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void hbu_streamoptions_parse_and_add_errors_to_suppress(hbe_err_t *hbe_err, nh_set_int32_t suppressed_errors, char *argv) {
|
||||
hb_list_charlist_t list = NULL;
|
||||
|
||||
if (argv == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
list = hb_list_charlist_create_from_split((hb_proc_char_t *) argv, ',');
|
||||
|
||||
for (size_t i = 0; i < list->length; i++) {
|
||||
hb_list_char_t part = hb_list_charlist_get(list, i);
|
||||
|
||||
if (hb_list_char_compare_lit(part, "MALFORMED_ENTITY") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_MALFORMED_ENTITY);
|
||||
} else if (hb_list_char_compare_lit(part, "BARE_AMPERSAND") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_BARE_AMPERSAND);
|
||||
} else if (hb_list_char_compare_lit(part, "INVALID_ENTITY") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_INVALID_ENTITY);
|
||||
} else if (hb_list_char_compare_lit(part, "NONSTANDARD_TAG") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_NONSTANDARD_TAG);
|
||||
} else if (hb_list_char_compare_lit(part, "UCASE_ATTR") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_UCASE_ATTR);
|
||||
} else if (hb_list_char_compare_lit(part, "UCASE_TAG") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_UCASE_TAG);
|
||||
} else if (hb_list_char_compare_lit(part, "UNQUOTED_ATTR") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_UNQUOTED_ATTR);
|
||||
} else if (hb_list_char_compare_lit(part, "SELF_CLOSING_TAG") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_SELF_CLOSING_TAG);
|
||||
} else {
|
||||
HBE_THROW_F(HBE_CLI_INVALID_SUPPRESSABLE_ERROR, "Unrecognised suppressable error `%s`", hb_list_char_underlying(part));
|
||||
}
|
||||
}
|
||||
|
||||
finally:
|
||||
if (list != NULL) {
|
||||
hb_list_charlist_destroy_from_split(list);
|
||||
list = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void hbu_streamoptions_log(hbu_streamoptions_t opt) {
|
||||
hbl_info_kv_boolean("Trim `class` attributes", opt->trim_class_attr);
|
||||
hbl_info_kv_boolean("Decode entities", opt->decode_entities);
|
||||
hbl_info_kv_boolean("Minify conditional comments", opt->min_conditional_comments);
|
||||
hbl_info_kv_boolean("Remove attribute quotes", opt->remove_attr_quotes);
|
||||
hbl_info_kv_boolean("Remove comments", opt->remove_comments);
|
||||
hbl_info_kv_boolean("Remove optional tags", opt->remove_optional_tags);
|
||||
hbl_info_kv_boolean("Remove tag whitespace", opt->remove_tag_whitespace);
|
||||
}
|
||||
|
||||
void hbu_streamoptions_parse_and_add_tag_set(hbe_err_t *hbe_err, char *set_name, nh_set_str_t set) {
|
||||
if (strcmp(set_name, "content") == 0) {
|
||||
hb_rule_contenttags_add_elems(set);
|
||||
} else if (strcmp(set_name, "contentfirst") == 0) {
|
||||
hb_rule_contentfirsttags_add_elems(set);
|
||||
} else if (strcmp(set_name, "formatting") == 0) {
|
||||
hb_rule_formattingtags_add_elems(set);
|
||||
} else if (strcmp(set_name, "layout") == 0) {
|
||||
hb_rule_layouttags_add_elems(set);
|
||||
} else if (strcmp(set_name, "specific") == 0) {
|
||||
hb_rule_specifictags_add_elems(set);
|
||||
} else if (strcmp(set_name, "heading") == 0) {
|
||||
hb_rule_headingtags_add_elems(set);
|
||||
} else if (strcmp(set_name, "media") == 0) {
|
||||
hb_rule_mediatags_add_elems(set);
|
||||
} else if (strcmp(set_name, "sectioning") == 0) {
|
||||
hb_rule_sectioningtags_add_elems(set);
|
||||
} else if (strcmp(set_name, "void") == 0) {
|
||||
hb_rule_voidtags_add_elems(set);
|
||||
} else if (strcmp(set_name, "wss") == 0) {
|
||||
hb_rule_wsstags_add_elems(set);
|
||||
} else {
|
||||
HBE_THROW_V(HBE_CLI_INVALID_TAG_SET, "Unrecognised tag set `%s`", set_name);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
hb_init();
|
||||
|
||||
hbe_err_t err = NULL;
|
||||
hbe_err_t *hbe_err = &err;
|
||||
|
||||
hbu_fstreamout_t output = NULL;
|
||||
hb_list_char_t output_buffer = NULL;
|
||||
|
||||
// Prepare config
|
||||
char *input_path = NULL;
|
||||
char *output_path = NULL;
|
||||
int logging = 0;
|
||||
int config_keep = 0;
|
||||
int config_buffer = 0;
|
||||
hbu_streamoptions_t config_stream = hbu_streamoptions_create();
|
||||
|
||||
int nondefault_ex_collapse_whitespace = 0;
|
||||
int nondefault_ex_destroy_whole_whitespace = 0;
|
||||
int nondefault_ex_trim_whitespace = 0;
|
||||
|
||||
// Parse arguments
|
||||
while (1) {
|
||||
struct option long_options[] = {
|
||||
{"keep", no_argument, &config_keep, 1},
|
||||
{"buffer", no_argument, &config_buffer, 1},
|
||||
{"verbose", no_argument, &logging, 1},
|
||||
{"input", required_argument, NULL, 'i'},
|
||||
{"output", required_argument, NULL, 'o'},
|
||||
{"suppress", required_argument, NULL, 's'},
|
||||
|
||||
{"MXcollapseWhitespace", optional_argument, NULL, 40},
|
||||
{"MXdestroyWholeWhitespace", optional_argument, NULL, 41},
|
||||
{"MXtrimWhitespace", optional_argument, NULL, 42},
|
||||
|
||||
{"MXtrimClassAttr", no_argument, &(config_stream->trim_class_attr), 0},
|
||||
{"MXdecEnt", no_argument, &(config_stream->decode_entities), 0},
|
||||
{"MXcondComments", no_argument, &(config_stream->min_conditional_comments), 0},
|
||||
{"MXattrQuotes", no_argument, &(config_stream->decode_entities), 0},
|
||||
{"MXcomments", no_argument, &(config_stream->remove_comments), 0},
|
||||
{"MXoptTags", no_argument, &(config_stream->remove_optional_tags), 0},
|
||||
{"MXtagWS", no_argument, &(config_stream->remove_tag_whitespace), 0},
|
||||
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
int option_index = 0;
|
||||
int c = getopt_long(argc, argv, "kbvi:o:s:", long_options, &option_index);
|
||||
|
||||
if (c == -1) {
|
||||
if (optind != argc) {
|
||||
HBE_THROW_F(HBE_CLI_TOO_MANY_OPTIONS, "Too many arguments provided");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 'i':
|
||||
input_path = optarg;
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
output_path = optarg;
|
||||
break;
|
||||
|
||||
case 's':
|
||||
HBE_CATCH_F(hbu_streamoptions_parse_and_add_errors_to_suppress, config_stream->suppressed_errors, optarg);
|
||||
break;
|
||||
|
||||
case 40:
|
||||
nondefault_ex_collapse_whitespace = 1;
|
||||
config_stream->ex_collapse_whitespace = HBE_CATCH_F(hbu_streamoptions_parse_list_of_tags, optarg);
|
||||
break;
|
||||
|
||||
case 41:
|
||||
nondefault_ex_destroy_whole_whitespace = 1;
|
||||
config_stream->ex_destroy_whole_whitespace = HBE_CATCH_F(hbu_streamoptions_parse_list_of_tags, optarg);
|
||||
break;
|
||||
|
||||
case 42:
|
||||
nondefault_ex_trim_whitespace = 1;
|
||||
config_stream->ex_trim_whitespace = HBE_CATCH_F(hbu_streamoptions_parse_list_of_tags, optarg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nondefault_ex_collapse_whitespace) config_stream->ex_collapse_whitespace = hbu_streamoptions_default_ex_collapse_whitespace();
|
||||
if (!nondefault_ex_destroy_whole_whitespace) config_stream->ex_destroy_whole_whitespace = hbu_streamoptions_default_ex_destroy_whole_whitespace();
|
||||
if (!nondefault_ex_trim_whitespace) config_stream->ex_trim_whitespace = hbu_streamoptions_default_ex_trim_whitespace();
|
||||
|
||||
if (logging) {
|
||||
hbl_info_kv_string("Input", input_path);
|
||||
hbl_info_kv_string("Output", output_path);
|
||||
hbl_info_kv_boolean("Buffer output until success", config_buffer);
|
||||
hbl_info_kv_boolean("Keep output file on error", config_keep);
|
||||
hbu_streamoptions_log(config_stream);
|
||||
}
|
||||
|
||||
hb_proc_t pipe = hb_proc_create_blank(input_path);
|
||||
|
||||
hbu_fstreamin_t input = HBE_CATCH_F(hbu_fstreamin_create, input_path);
|
||||
hb_proc_blank_set_input_fstreamin(pipe, input);
|
||||
|
||||
if (config_buffer) {
|
||||
output_buffer = hb_list_char_create();
|
||||
hb_proc_blank_set_output_buffer(pipe, output_buffer);
|
||||
} else {
|
||||
output = HBE_CATCH_F(hbu_fstreamout_create, output_path);
|
||||
hb_proc_blank_set_output_fstreamout(pipe, output);
|
||||
}
|
||||
|
||||
HBE_CATCH_F(hbs_content, config_stream, pipe, NULL);
|
||||
|
||||
if (config_buffer) {
|
||||
output = HBE_CATCH_F(hbu_fstreamout_create, output_path);
|
||||
HBE_CATCH_F(hbu_fstreamout_write_buffer, output, output_buffer);
|
||||
}
|
||||
|
||||
finally:
|
||||
if (err != NULL) {
|
||||
hbl_error(err);
|
||||
if (output != NULL && !config_keep && !config_buffer) {
|
||||
// Delete only after opening output stream (don't delete before existing file has not been touched)
|
||||
// Don't need to set if $config_buffer, as it won't write anything anyway
|
||||
if (unlink(output_path)) {
|
||||
hbl_log(HBL_LOG_WARN, "Failed to delete file %s with error %d", output_path, errno);
|
||||
} else {
|
||||
hbl_log(HBL_LOG_INFO, "%s has been deleted", output_path);
|
||||
}
|
||||
}
|
||||
exit(err->code);
|
||||
}
|
||||
|
||||
if (logging) {
|
||||
hbl_log(HBL_LOG_INFO, "All done!");
|
||||
}
|
||||
exit(0);
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -1,94 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "hb-rule.h"
|
||||
#include "hb-config.h"
|
||||
|
||||
static struct hb_config_ex_s _ex_collapse_whitespace_default;
|
||||
static struct hb_config_ex_s _ex_destroy_whole_whitespace_default;
|
||||
static struct hb_config_ex_s _ex_trim_whitespace_default;
|
||||
|
||||
// WARNING: Rules must be initialised before calling this function
|
||||
void hb_config_init(void)
|
||||
{
|
||||
nh_set_str ex_collapse_whitespace_set = nh_set_str_create();
|
||||
hb_rule_wsstags_add_elems(ex_collapse_whitespace_set);
|
||||
_ex_collapse_whitespace_default = {HB_CONFIG_EX_MODE_DEFAULT,
|
||||
ex_collapse_whitespace_set};
|
||||
|
||||
nh_set_str ex_destroy_whole_whitespace_set = nh_set_str_create();
|
||||
hb_rule_wsstags_add_elems(ex_destroy_whole_whitespace_set);
|
||||
hb_rule_contenttags_add_elems(ex_destroy_whole_whitespace_set);
|
||||
hb_rule_formattingtags_add_elems(ex_destroy_whole_whitespace_set);
|
||||
_ex_destroy_whole_whitespace_default = {
|
||||
HB_CONFIG_EX_MODE_DEFAULT, ex_destroy_whole_whitespace_set};
|
||||
|
||||
nh_set_str ex_trim_whitespace_set = nh_set_str_create();
|
||||
hb_rule_wsstags_add_elems(ex_trim_whitespace_set);
|
||||
hb_rule_formattingtags_add_elems(ex_trim_whitespace_set);
|
||||
_ex_trim_whitespace_default = {HB_CONFIG_EX_MODE_DEFAULT,
|
||||
ex_trim_whitespace_set};
|
||||
}
|
||||
|
||||
hb_config_t* hb_config_create(void)
|
||||
{
|
||||
hb_config_t* config = malloc(sizeof(struct hb_config_s));
|
||||
config->ex_collapse_whitespace = _ex_collapse_whitespace_default;
|
||||
config->ex_destroy_whole_whitespace =
|
||||
_ex_destroy_whole_whitespace_default;
|
||||
config->ex_trim_whitespace = _ex_trim_whitespace_default;
|
||||
config->suppressed_errors = nh_set_int32_create();
|
||||
config->trim_class_attr = true;
|
||||
config->decode_entities = true;
|
||||
config->min_conditional_comments = true;
|
||||
config->remove_attr_quotes = true;
|
||||
config->remove_comments = true;
|
||||
config->remove_optional_tags = true;
|
||||
config->remove_tag_whitespace = true;
|
||||
return config;
|
||||
}
|
||||
|
||||
void hb_config_ex_use_none(hb_config_ex_t* config_ex)
|
||||
{
|
||||
*config_ex = {HB_CONFIG_EX_MODE_NONE, NULL};
|
||||
}
|
||||
|
||||
void hb_config_ex_use_custom(hb_config_ex_t* config_ex, nh_set_str custom_set)
|
||||
{
|
||||
*config_ex = {HB_CONFIG_EX_MODE_CUSTOM, custom_set};
|
||||
}
|
||||
|
||||
void hb_config_ex_use_all(hb_config_ex_t* config_ex)
|
||||
{
|
||||
*config_ex = {HB_CONFIG_EX_MODE_ALL};
|
||||
}
|
||||
|
||||
void hb_config_destroy(hb_config_t* opt)
|
||||
{
|
||||
nh_set_int32_destroy(opt->suppressed_errors);
|
||||
free(opt);
|
||||
}
|
||||
|
||||
bool hb_config_supressed_error_check(hb_config_t opt, hb_error_t errcode)
|
||||
{
|
||||
return nh_set_int32_has(&opt->suppressed_errors, errcode);
|
||||
}
|
||||
|
||||
bool hb_config_ex_check(hb_config_t* config, hb_proc_char_t* query)
|
||||
{
|
||||
switch (config->mode) {
|
||||
case HB_CONFIG_EX_MODE_ALL:
|
||||
return true;
|
||||
|
||||
case HB_CONFIG_EX_MODE_NONE:
|
||||
return false;
|
||||
|
||||
default:
|
||||
return nh_set_str_has(config->set, query);
|
||||
}
|
||||
if (config->mode == HB_CONFIG_EX_MODE_ALL) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
typedef enum {
|
||||
HB_CONFIG_EX_MODE_NONE, // i.e. minify all without exeption
|
||||
HB_CONFIG_EX_MODE_DEFAULT, // entire struct will not be destroyed
|
||||
HB_CONFIG_EX_MODE_CUSTOM, // set will be destroyed
|
||||
HB_CONFIG_EX_MODE_ALL, // i.e. don't minify
|
||||
} hb_config_ex_mode_t;
|
||||
|
||||
typedef struct {
|
||||
hb_config_ex_mode_t mode;
|
||||
nh_set_str set;
|
||||
} hb_config_ex_t;
|
||||
|
||||
typedef struct {
|
||||
hb_config_ex_t ex_collapse_whitespace;
|
||||
hb_config_ex_t ex_destroy_whole_whitespace;
|
||||
hb_config_ex_t ex_trim_whitespace;
|
||||
nh_set_int32 suppressed_errors;
|
||||
bool trim_class_attributes;
|
||||
bool decode_entities;
|
||||
bool remove_attr_quotes;
|
||||
bool remove_comments;
|
||||
bool remove_optional_tags;
|
||||
bool remove_tag_whitespace;
|
||||
} hb_config_t;
|
||||
|
||||
// WARNING: Rules must be initialised before calling this function
|
||||
void hb_config_init(void);
|
||||
hb_config_t* hb_config_create(void);
|
||||
void hb_config_ex_use_none(hb_config_ex_t* config_ex);
|
||||
void hb_config_ex_use_custom(hb_config_ex_t* config_ex, nh_set_str custom_set);
|
||||
void hb_config_ex_use_all(hb_config_ex_t* config_ex);
|
||||
void hb_config_destroy(hb_config_t* opt);
|
||||
bool hb_config_supressed_error_check(hb_config_t opt, hb_error_t errcode);
|
||||
bool hb_config_ex_check(hb_config_ex_t* config, hb_proc_char_t* query);
|
|
@ -1,8 +0,0 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "nicehash/list.h"
|
||||
#include "nicehash/list-ucp.h"
|
||||
#include "nicehash/map-str.h"
|
||||
|
||||
NH_MAP_STR(int32, int32_t);
|
||||
NH_MAP_STR(set_str, nh_set_str*);
|
|
@ -1,37 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
typedef enum {
|
||||
HBE_NO_ERROR,
|
||||
|
||||
HBE_INTERR_UNKNOWN_ENTITY_TYPE,
|
||||
HBE_INTERR_UNKNOWN_CONTENT_NEXT_STATE,
|
||||
|
||||
HBE_CLI_TOO_MANY_OPTIONS,
|
||||
HBE_CLI_INVALID_TAG_SET,
|
||||
HBE_CLI_INVALID_TAG,
|
||||
HBE_CLI_INVALID_SUPPRESSABLE_ERROR,
|
||||
|
||||
HBE_IO_FOPEN_FAIL,
|
||||
HBE_IO_FCLOSE_FAIL,
|
||||
HBE_IO_FREAD_FAIL,
|
||||
HBE_IO_FWRITE_FAIL,
|
||||
|
||||
HBE_PARSE_MALFORMED_ENTITY,
|
||||
HBE_PARSE_BARE_AMPERSAND,
|
||||
HBE_PARSE_INVALID_ENTITY,
|
||||
HBE_PARSE_NONSTANDARD_TAG,
|
||||
HBE_PARSE_UCASE_TAG,
|
||||
HBE_PARSE_UCASE_ATTR,
|
||||
HBE_PARSE_UNQUOTED_ATTR,
|
||||
HBE_PARSE_ILLEGAL_CHILD,
|
||||
HBE_PARSE_UNCLOSED_TAG,
|
||||
HBE_PARSE_SELF_CLOSING_TAG,
|
||||
HBE_PARSE_NO_SPACE_BEFORE_ATTR,
|
||||
|
||||
HBE_PARSE_UNEXPECTED_END,
|
||||
HBE_PARSE_EXPECTED_NOT_FOUND,
|
||||
} hb_error_t;
|
||||
|
||||
typedef struct {
|
||||
int set[32];
|
||||
} ;
|
|
@ -1,24 +0,0 @@
|
|||
#include <errno.h>
|
||||
#include "../char/char.c"
|
||||
#include "../execution/error.c"
|
||||
#include "./__base__.c"
|
||||
|
||||
HBU_FSTREAM_BUILD_INFRA(in, "r", "read", "reading", stdin)
|
||||
|
||||
hb_eod_char_t hbu_fstreamin_read(hbe_err_t* hbe_err, hbu_fstreamin_t fstreamin)
|
||||
{
|
||||
hb_proc_char_t c;
|
||||
|
||||
if (fread(&c, SIZEOF_CHAR, 1, fstreamin->fd) != SIZEOF_CHAR) {
|
||||
if (ferror(fstreamin->fd)) {
|
||||
HBE_THROW(HBE_IO_FREAD_FAIL,
|
||||
"Failed to read input file %s",
|
||||
fstreamin->name);
|
||||
}
|
||||
|
||||
// Must be EOF
|
||||
return HB_EOD;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
#include <errno.h>
|
||||
#include "../execution/error.c"
|
||||
#include "../list/char.c"
|
||||
#include "./__base__.c"
|
||||
|
||||
HBU_FSTREAM_BUILD_INFRA(out, "w", "write", "writing", stdout)
|
||||
|
||||
static void _hbu_fstreamout_fwrite(hbe_err_t* hbe_err,
|
||||
hbu_fstreamout_t fstreamout,
|
||||
hb_proc_char_t* source, size_t length)
|
||||
{
|
||||
if (fwrite(source, SIZEOF_CHAR, length, fstreamout->fd)
|
||||
!= SIZEOF_CHAR * length) {
|
||||
HBE_THROW_V(HBE_IO_FWRITE_FAIL,
|
||||
"Failed to write to output file %s",
|
||||
fstreamout->name);
|
||||
}
|
||||
}
|
||||
|
||||
void hbu_fstreamout_write_buffer(hbe_err_t* hbe_err,
|
||||
hbu_fstreamout_t fstreamout,
|
||||
hb_list_char_t buffer)
|
||||
{
|
||||
HBE_CATCH_V(_hbu_fstreamout_fwrite, fstreamout,
|
||||
hb_list_char_underlying(buffer), buffer->length);
|
||||
}
|
||||
|
||||
void hbu_fstreamout_write(hbe_err_t* hbe_err, hbu_fstreamout_t fstreamout,
|
||||
hb_proc_char_t c)
|
||||
{
|
||||
HBE_CATCH_V(_hbu_fstreamout_fwrite, fstreamout, &c, 1);
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
typedef enum {
|
||||
HB_FILE_ENC_UTF_8,
|
||||
HB_FILE_ENC_UTF_16,
|
||||
} hb_file_enc_t;
|
||||
|
||||
#define HB_FILE(type, mode, noun, verb, std) \
|
||||
typedef struct { \
|
||||
char const* name; \
|
||||
hb_file_enc_t encoding; \
|
||||
FILE* fd; \
|
||||
} hb_file_##type##_t; \
|
||||
\
|
||||
hb_file_##type##_t* hb_file_##type##_create(char* path) \
|
||||
{ \
|
||||
hb_file_##type##_t* fstream = \
|
||||
malloc(sizeof(hb_file_##type##_t)); \
|
||||
\
|
||||
if (path == NULL) { \
|
||||
fstream->name = #std; \
|
||||
fstream->fd = std; \
|
||||
} else { \
|
||||
fstream->name = path; \
|
||||
\
|
||||
FILE* fd = fopen(path, mode); \
|
||||
\
|
||||
if (fd == NULL) { \
|
||||
return NULL; \
|
||||
} \
|
||||
\
|
||||
fstream->fd = fd; \
|
||||
} \
|
||||
\
|
||||
return fstream; \
|
||||
} \
|
||||
\
|
||||
void hb_file_##type##_destroy(hbe_err_t* hbe_err, \
|
||||
hb_file_##type##_t fstream) \
|
||||
{ \
|
||||
if (fclose(fstream->fd) == EOF) { \
|
||||
HBE_THROW_V(HBE_IO_FCLOSE_FAIL, \
|
||||
"Failed to close " noun \
|
||||
" stream for file %s with error %d", \
|
||||
fstream->name, errno); \
|
||||
} \
|
||||
\
|
||||
free(fstream); \
|
||||
}
|
|
@ -1,162 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include "../../rule/char/ucalpha.c"
|
||||
#include "../char/char.c"
|
||||
#include "../execution/error.c"
|
||||
#include "../list/char.c"
|
||||
#include "../fstream/fstreamin.c"
|
||||
#include "../fstream/fstreamout.c"
|
||||
|
||||
// Use macro to prevent having to allocate (and therefore free/manage) memory
|
||||
#define HB_PROC_FORMAT_WITH_POS(fn, a, format, ...) fn(a, format " at %s [line %d, column %d]", __VA_ARGS__, proc->name, proc->line, proc->column);
|
||||
|
||||
/**
|
||||
* Creates an error using a message with the current position appended.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param errcode error code
|
||||
* @param reason message
|
||||
* @return error
|
||||
*/
|
||||
hbe_err_t hb_proc_error(hb_proc_t* proc, hb_error_t errcode, const char *reason, ...) {
|
||||
va_list args;
|
||||
va_start(args, reason);
|
||||
|
||||
char *msg = calloc(HB_PROC_MAX_ERR_MSG_LEN + 1, SIZEOF_CHAR);
|
||||
vsnprintf(msg, HB_PROC_MAX_ERR_MSG_LEN, reason, args);
|
||||
|
||||
va_end(args);
|
||||
|
||||
hbe_err_t err = HBU_FN_FORMAT_WITH_POS(hbe_err_create, errcode, "%s", msg);
|
||||
free(msg);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a character to the redirect, if enabled, otherwise output, of a proc,
|
||||
* unless the output is masked.
|
||||
*
|
||||
* @param hbe_err pointer to hbe_err_t
|
||||
* @param proc proc
|
||||
* @param c character to write
|
||||
* @return a freshly-created proc
|
||||
* @throws on write error
|
||||
*/
|
||||
static void _hb_proc_write_to_output(hbe_err_t *hbe_err, hb_proc_t* proc, hb_proc_char_t c) {
|
||||
if (!proc->mask) {
|
||||
hb_list_char_t redirect = proc->redirect;
|
||||
if (redirect != NULL) {
|
||||
hb_list_char_append(redirect, c);
|
||||
} else {
|
||||
HBE_CATCH_V((*proc->writer), proc->output, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* INSTANCE MANAGEMENT FUNCTIONS
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allocates memory for a proc, and creates one with provided arguments.
|
||||
*
|
||||
* @param input input
|
||||
* @param reader reader
|
||||
* @param name name
|
||||
* @param output output
|
||||
* @param writer writer
|
||||
* @return a freshly-created proc
|
||||
*/
|
||||
hb_proc_t* hb_proc_create_blank(char *name) {
|
||||
hb_proc_t* proc = calloc(1, sizeof(hb_proc_t));
|
||||
|
||||
proc->name = name;
|
||||
|
||||
proc->input = NULL;
|
||||
proc->reader = NULL;
|
||||
proc->EOI = false;
|
||||
|
||||
proc->line = 1;
|
||||
proc->column = 0;
|
||||
proc->CR = false;
|
||||
|
||||
proc->output = NULL;
|
||||
proc->writer = NULL;
|
||||
proc->buffer = nh_list_ucp_create();
|
||||
proc->mask = false;
|
||||
proc->redirect = NULL;
|
||||
|
||||
return proc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees all memory associated with a proc.
|
||||
*
|
||||
* @param proc proc
|
||||
*/
|
||||
void hb_proc_destroy(hb_proc_t* proc) {
|
||||
nh_list_ucp_destroy(proc->buffer);
|
||||
free(proc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables or disables the output mask.
|
||||
* When the output mask is enabled, all writes are simply discarded and not actually written to output.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param mask 1 to enable, 0 to disable
|
||||
* @return previous state
|
||||
*/
|
||||
int hb_proc_toggle_output_mask(hb_proc_t* proc, int mask) {
|
||||
int current = proc->mask;
|
||||
proc->mask = mask;
|
||||
return current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables or disables the output redirect.
|
||||
* When the output redirect is enabled, all writes are written to a buffer instead of the output.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param redirect buffer to redirect writes to, or NULL to disable
|
||||
*/
|
||||
void hb_proc_set_redirect(hb_proc_t* proc, hb_list_char_t redirect) {
|
||||
proc->redirect = redirect;
|
||||
}
|
||||
|
||||
void hb_proc_blank_set_input_fstreamin(hb_proc_t* proc, hbu_fstreamin_t fstreamin) {
|
||||
proc->input = fstreamin;
|
||||
proc->reader = (hb_proc_reader_cb_t) &hbu_fstreamin_read;
|
||||
}
|
||||
|
||||
// Wrapper function for hb_list_char_shift to make it compatible with hb_proc_reader_cb_t
|
||||
static hb_eod_char_t hb_proc_read_from_list_char_input(hbe_err_t *hbe_err, hb_list_char_t input) {
|
||||
(void) hbe_err;
|
||||
return hb_list_char_shift(input);
|
||||
}
|
||||
|
||||
void hb_proc_blank_set_input_buffer(hb_proc_t* proc, hb_list_char_t buf) {
|
||||
proc->input = buf;
|
||||
proc->reader = (hb_proc_reader_cb_t) &hb_proc_read_from_list_char_input;
|
||||
}
|
||||
|
||||
static void hb_proc_blank_set_output_fstreamout(hb_proc_t* proc, hbu_fstreamout_t fstreamout) {
|
||||
proc->output = fstreamout;
|
||||
proc->writer = (hb_proc_writer_cb_t) &hbu_fstreamout_write;
|
||||
}
|
||||
|
||||
// Wrapper function for hb_list_char_append to make it compatible with hb_proc_writer_cb_t
|
||||
void hb_proc_write_to_list_char_output(hbe_err_t *hbe_err, hb_list_char_t output, hb_proc_char_t c) {
|
||||
(void) hbe_err;
|
||||
hb_list_char_append(output, c);
|
||||
}
|
||||
|
||||
void hb_proc_blank_set_output_buffer(hb_proc_t* proc, hb_list_char_t buf) {
|
||||
proc->output = buf;
|
||||
proc->writer = (hb_proc_writer_cb_t) &hb_proc_write_to_list_char_output;
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <setjmp.h>
|
||||
|
||||
#include "hb-data.h"
|
||||
#include "hb-config.h"
|
||||
|
||||
typedef int32_t hb_proc_char_t;
|
||||
|
||||
#define HB_PROC_CHAR_EOD -1 // End Of Data
|
||||
#define HB_PROC_CHAR_SIZE sizeof(hb_proc_char_t)
|
||||
|
||||
typedef bool hb_proc_predicate_t(hb_proc_char_t);
|
||||
|
||||
// Reader and writer callbacks. The last parameter is a pointer to an error
|
||||
// message. If the last parameter is not NULL, it is assumed an error occurred.
|
||||
// The error message WILL BE free'd by the callee automatically, so ensure the
|
||||
// message was created using malloc or strdup, and is not free'd by the function
|
||||
// or anything else afterwards.
|
||||
typedef hb_proc_char_t hb_proc_reader_t(void*, char**);
|
||||
typedef void hb_proc_writer_t(void*, hb_proc_char_t, char**);
|
||||
|
||||
#define HB_PROC_MEMORY_CREATE(name) \
|
||||
hb_proc_list_memory_instance_add_right_and_return( \
|
||||
config->memory_instances, name##_create()); \
|
||||
hb_proc_list_memory_destructor_add_right( \
|
||||
config->memory_destructors, \
|
||||
(hb_proc_memory_destructor_t*) &name##_destroy);
|
||||
|
||||
NH_LIST(hb_proc_list_memory_instance, void*, sizeof(void*), void*, NULL);
|
||||
void* hb_proc_list_memory_instance_add_right_and_return(
|
||||
hb_proc_list_memory_instance*, void*);
|
||||
|
||||
typedef void hb_proc_memory_destructor_t(void*);
|
||||
NH_LIST(hb_proc_list_memory_destructor, hb_proc_memory_destructor_t*,
|
||||
sizeof(hb_proc_memory_destructor_t*), hb_proc_memory_destructor_t*,
|
||||
NULL);
|
||||
|
||||
#define HB_PROC_ERROR_MESSAGE_SIZE 1024
|
||||
typedef struct {
|
||||
hb_error_t code;
|
||||
char* message;
|
||||
} hb_proc_result_t;
|
||||
|
||||
typedef struct {
|
||||
char* name;
|
||||
jmp_buf start;
|
||||
|
||||
hb_proc_list_memory_instance* memory_instances;
|
||||
hb_proc_list_memory_destructor* memory_destructors;
|
||||
|
||||
void* input;
|
||||
hb_proc_reader_t* reader;
|
||||
bool EOI;
|
||||
|
||||
int line;
|
||||
int column;
|
||||
bool CR;
|
||||
|
||||
void* output;
|
||||
hb_proc_writer_t* writer;
|
||||
nh_list_ucp* buffer;
|
||||
bool mask;
|
||||
nh_list_ucp* redirect;
|
||||
|
||||
hb_config_t config;
|
||||
} hb_proc_t;
|
||||
|
||||
hb_proc_t* hb_proc_create_blank(char* name);
|
||||
void hb_proc_result_destroy(hb_proc_result_t* result);
|
||||
|
||||
hb_proc_result_t* hb_proc_start(hb_proc_t* proc);
|
||||
void _hb_proc_error(hb_proc_t* proc, hb_error_t code, char const* format, ...);
|
|
@ -1,101 +0,0 @@
|
|||
// Declare first before tag.c, as tag.c depends on it
|
||||
void hbs_content(hbe_err_t *hbe_err, hbu_streamoptions_t so, hb_proc_t pipe, hb_proc_char_t *parent);
|
||||
|
||||
#include "../tag/tag.c"
|
||||
#include "../bang/bang.c"
|
||||
#include "../comment/comment.c"
|
||||
#include "../entity/entity.c"
|
||||
|
||||
// $parent can be NULL for top-level content
|
||||
void hbs_content(hbe_err_t *hbe_err, hbu_streamoptions_t so, hb_proc_t pipe, hb_proc_char_t *parent) {
|
||||
int is_first_char = 1;
|
||||
// Set to 1 when $whitespace is instantiated when $is_first_char is 1
|
||||
int whitespace_buffer_started_at_beginning = 0;
|
||||
|
||||
// Set to one after calling hbs_comment, hbs_bang, or hbs_tag
|
||||
int returned_from_comment_bang_or_tag = 0;
|
||||
int whitespace_buffer_started_after_right_chevron = 0;
|
||||
|
||||
int should_collapse_whitespace = !hbu_streamoptions_in_tags_list(so->ex_collapse_whitespace, parent);
|
||||
int should_destroy_whole_whitespace = !hbu_streamoptions_in_tags_list(so->ex_destroy_whole_whitespace, parent);
|
||||
int should_trim_whitespace = !hbu_streamoptions_in_tags_list(so->ex_trim_whitespace, parent);
|
||||
|
||||
int should_buffer_whitespace = should_collapse_whitespace || should_destroy_whole_whitespace || should_trim_whitespace;
|
||||
hb_list_char_t whitespace = NULL;
|
||||
|
||||
while (1) {
|
||||
hb_eod_char_t c = HBE_CATCH_F(hb_proc_peek_eoi, pipe);
|
||||
int next_state = HBE_CATCH_F(_hbs_content_get_next_state, pipe, c);
|
||||
|
||||
if (next_state == HBS_CONTENT_STATE_TEXT && hb_rule_whitespace_check(c) && should_buffer_whitespace) {
|
||||
// Next character is whitespace and whitespace should be buffered
|
||||
if (whitespace == NULL) {
|
||||
whitespace = hb_list_char_create();
|
||||
whitespace_buffer_started_at_beginning = is_first_char;
|
||||
whitespace_buffer_started_after_right_chevron = returned_from_comment_bang_or_tag;
|
||||
}
|
||||
hb_list_char_append(whitespace, c);
|
||||
HBE_CATCH_F(hb_proc_skip, pipe);
|
||||
|
||||
} else {
|
||||
if (whitespace != NULL) {
|
||||
// Next character is not whitespace, deal with existing buffered whitespace
|
||||
if (should_destroy_whole_whitespace &&
|
||||
whitespace_buffer_started_after_right_chevron &&
|
||||
_hbs_content_state_is_comment_bang_or_opening_tag(next_state)
|
||||
) {
|
||||
// Do nothing
|
||||
|
||||
} else if (should_trim_whitespace &&
|
||||
(whitespace_buffer_started_at_beginning || next_state == HBS_CONTENT_STATE_END)) {
|
||||
// Do nothing
|
||||
|
||||
} else if (should_collapse_whitespace) {
|
||||
HBE_CATCH_F(hb_proc_write, pipe, ' ');
|
||||
}
|
||||
|
||||
hb_list_char_destroy(whitespace);
|
||||
whitespace = NULL;
|
||||
whitespace_buffer_started_at_beginning = 0;
|
||||
}
|
||||
|
||||
switch (next_state) {
|
||||
case HBS_CONTENT_STATE_TEXT:
|
||||
HBE_CATCH_F(hb_proc_accept, pipe);
|
||||
break;
|
||||
|
||||
case HBS_CONTENT_STATE_COMMENT:
|
||||
HBE_CATCH_F(hbs_comment, so, pipe);
|
||||
break;
|
||||
|
||||
case HBS_CONTENT_STATE_BANG:
|
||||
HBE_CATCH_F(hbs_bang, pipe);
|
||||
break;
|
||||
|
||||
case HBS_CONTENT_STATE_OPENING_TAG:
|
||||
HBE_CATCH_F(hbs_tag, so, pipe, parent);
|
||||
break;
|
||||
|
||||
case HBS_CONTENT_STATE_ENTITY:
|
||||
HBE_CATCH_F(hbs_entity, so, pipe);
|
||||
break;
|
||||
|
||||
case HBS_CONTENT_STATE_END:
|
||||
return;
|
||||
|
||||
default:
|
||||
HBE_THROW_F(HBE_INTERR_UNKNOWN_CONTENT_NEXT_STATE, "INTERR $next_state is not a known upcoming content stream state");
|
||||
}
|
||||
|
||||
returned_from_comment_bang_or_tag = _hbs_content_state_is_comment_bang_or_opening_tag(next_state);
|
||||
}
|
||||
|
||||
is_first_char = 0;
|
||||
}
|
||||
|
||||
finally:
|
||||
if (whitespace != NULL) {
|
||||
hb_list_char_destroy(whitespace);
|
||||
whitespace = NULL;
|
||||
}
|
||||
}
|
|
@ -1,126 +0,0 @@
|
|||
static void _hbs_script_slcomment(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
HBE_CATCH_V(hb_proc_require_match, pipe, "//");
|
||||
|
||||
// Comment can end at closing </script>
|
||||
// NOTE: Closing tag must not contain whitespace
|
||||
while (1) {
|
||||
int line_term = HBE_CATCH_V(hb_proc_accept_if_matches_line_terminator, pipe);
|
||||
if (line_term) {
|
||||
break;
|
||||
}
|
||||
|
||||
int end_tag = HBE_CATCH_V(hb_proc_matches_i, pipe, "</script>");
|
||||
if (end_tag) {
|
||||
break;
|
||||
}
|
||||
|
||||
HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
}
|
||||
}
|
||||
|
||||
static void _hbs_script_mlcomment(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
HBE_CATCH_V(hb_proc_require_match, pipe, "/*");
|
||||
|
||||
// Comment can end at closing </script>
|
||||
// NOTE: Closing tag must not contain whitespace
|
||||
while (1) {
|
||||
int end = HBE_CATCH_V(hb_proc_accept_if_matches, pipe, "*/");
|
||||
if (end) {
|
||||
break;
|
||||
}
|
||||
|
||||
int end_tag = HBE_CATCH_V(hb_proc_matches_i, pipe, "</script>");
|
||||
if (end_tag) {
|
||||
break;
|
||||
}
|
||||
|
||||
HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
}
|
||||
}
|
||||
|
||||
static void _hbs_script_string(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
hb_proc_char_t delim = HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
|
||||
if (delim != '"' && delim != '\'') {
|
||||
hb_proc_THROW_V(pipe, HBE_PARSE_EXPECTED_NOT_FOUND, "Expected JavaScript string delimiter");
|
||||
}
|
||||
|
||||
int escaping = 0;
|
||||
|
||||
while (1) {
|
||||
hb_proc_char_t c = HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
|
||||
if (c == '\\') {
|
||||
escaping ^= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == delim && !escaping) {
|
||||
break;
|
||||
}
|
||||
|
||||
int line_term = HBE_CATCH_V(hb_proc_accept_if_matches_line_terminator, pipe);
|
||||
if (line_term) {
|
||||
if (!escaping) {
|
||||
hb_proc_THROW_V(pipe, HBE_PARSE_EXPECTED_NOT_FOUND, "Unterminated JavaScript string");
|
||||
}
|
||||
}
|
||||
|
||||
escaping = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void _hbs_script_template(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
HBE_CATCH_V(hb_proc_require_match, pipe, "`");
|
||||
|
||||
int escaping = 0;
|
||||
|
||||
while (1) {
|
||||
hb_proc_char_t c = HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
|
||||
if (c == '\\') {
|
||||
escaping ^= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '`' && !escaping) {
|
||||
break;
|
||||
}
|
||||
|
||||
escaping = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void hbs_script(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
while (1) {
|
||||
int end = HBE_CATCH_V(hb_proc_matches, pipe, "</");
|
||||
if (end) {
|
||||
break;
|
||||
}
|
||||
|
||||
int sl_comment = HBE_CATCH_V(hb_proc_matches, pipe, "//");
|
||||
if (sl_comment) {
|
||||
HBE_CATCH_V(_hbs_script_slcomment, pipe);
|
||||
continue;
|
||||
}
|
||||
|
||||
int ml_comment = HBE_CATCH_V(hb_proc_matches, pipe, "/*");
|
||||
if (ml_comment) {
|
||||
HBE_CATCH_V(_hbs_script_mlcomment, pipe);
|
||||
continue;
|
||||
}
|
||||
|
||||
hb_proc_char_t next = HBE_CATCH_V(hb_proc_peek, pipe);
|
||||
if (next == '"' || next == '\'') {
|
||||
HBE_CATCH_V(_hbs_script_string, pipe);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (next == '`') {
|
||||
HBE_CATCH_V(_hbs_script_template, pipe);
|
||||
continue;
|
||||
}
|
||||
|
||||
HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
}
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
static void _hbs_style_comment(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
HBE_CATCH_V(hb_proc_require_match, pipe, "/*");
|
||||
|
||||
// Unlike script tags, style comments do NOT end at closing tag
|
||||
while (1) {
|
||||
int is_end = HBE_CATCH_V(hb_proc_accept_if_matches, pipe, "*/");
|
||||
if (is_end) {
|
||||
break;
|
||||
}
|
||||
HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
}
|
||||
}
|
||||
|
||||
static void _hbs_style_string(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
hb_proc_char_t delim = HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
|
||||
if (delim != '"' && delim != '\'') {
|
||||
hb_proc_THROW_V(pipe, HBE_PARSE_EXPECTED_NOT_FOUND, "Expected CSS string delimiter");
|
||||
}
|
||||
|
||||
int escaping = 0;
|
||||
|
||||
while (1) {
|
||||
hb_proc_char_t c = HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
|
||||
if (c == '\\') {
|
||||
escaping ^= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == delim && !escaping) {
|
||||
break;
|
||||
}
|
||||
|
||||
int line_term = HBE_CATCH_V(hb_proc_accept_if_matches_line_terminator, pipe);
|
||||
if (line_term) {
|
||||
if (!escaping) {
|
||||
hb_proc_THROW_V(pipe, HBE_PARSE_EXPECTED_NOT_FOUND, "Unterminated CSS string");
|
||||
}
|
||||
}
|
||||
|
||||
escaping = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void hbs_style(hbe_err_t *hbe_err, hb_proc_t pipe) {
|
||||
while (1) {
|
||||
int end = HBE_CATCH_V(hb_proc_matches, pipe, "</");
|
||||
if (end) {
|
||||
break;
|
||||
}
|
||||
|
||||
int is_comment = HBE_CATCH_V(hb_proc_matches, pipe, "/*");
|
||||
if (is_comment) {
|
||||
HBE_CATCH_V(_hbs_style_comment, pipe);
|
||||
continue;
|
||||
}
|
||||
|
||||
hb_proc_char_t next = HBE_CATCH_V(hb_proc_peek, pipe);
|
||||
if (next == '"' || next == '\'') {
|
||||
HBE_CATCH_V(_hbs_style_string, pipe);
|
||||
continue;
|
||||
}
|
||||
|
||||
HBE_CATCH_V(hb_proc_accept, pipe);
|
||||
}
|
||||
}
|
|
@ -1,170 +0,0 @@
|
|||
#include <inttypes.h>
|
||||
|
||||
// NOTE: Do not use 0
|
||||
#define HBS_ENTITY_TYPE_NAME 1
|
||||
#define HBS_ENTITY_TYPE_DECIMAL 2
|
||||
#define HBS_ENTITY_TYPE_HEXADECIMAL 3
|
||||
|
||||
#define HBS_ENTITY_MAX_ENTITY_LENGTH 10 // Don't try to parse malformed entities forever
|
||||
|
||||
static void _hbs_entity_interr_unknown_entity(hbe_err_t *hbe_err, int type) {
|
||||
HBE_THROW_V(HBE_INTERR_UNKNOWN_ENTITY_TYPE, "INTERR %d is not a known entity type", type);
|
||||
}
|
||||
|
||||
static void _hbs_entity_write_literal(hbe_err_t *hbe_err, hb_proc_t pipe, int type, hb_list_char_t entity_raw, int consumed_semicolon) {
|
||||
HBE_CATCH_V(hb_proc_write, pipe, '&');
|
||||
if (type == HBS_ENTITY_TYPE_HEXADECIMAL || type == HBS_ENTITY_TYPE_DECIMAL) {
|
||||
HBE_CATCH_V(hb_proc_write, pipe, '#');
|
||||
if (type == HBS_ENTITY_TYPE_HEXADECIMAL) {
|
||||
HBE_CATCH_V(hb_proc_write, pipe, 'x');
|
||||
}
|
||||
}
|
||||
|
||||
if (entity_raw != NULL) {
|
||||
HBE_CATCH_V(hb_proc_write_buffer, pipe, entity_raw);
|
||||
}
|
||||
|
||||
if (consumed_semicolon) {
|
||||
HBE_CATCH_V(hb_proc_write, pipe, ';');
|
||||
}
|
||||
}
|
||||
|
||||
static void _hbs_entity_syntax_error(hbe_err_t *hbe_err, hbu_streamoptions_t so, hb_proc_t pipe, int type, hb_list_char_t entity_raw, int consumed_semicolon, hb_error_t errcode, const char *reason) {
|
||||
if (hbu_streamoptions_supressed_error(so, errcode)) {
|
||||
HBE_CATCH_V(_hbs_entity_write_literal, pipe, type, entity_raw, consumed_semicolon);
|
||||
return;
|
||||
}
|
||||
|
||||
hb_proc_THROW_V(pipe, errcode, reason);
|
||||
}
|
||||
|
||||
// NOTE: Return 0 if syntax error but suppressed
|
||||
static int _hbs_entity_process_prefix(hbe_err_t *hbe_err, hb_proc_t pipe, hbu_streamoptions_t so) {
|
||||
hb_proc_char_t c = HBE_CATCH(hb_proc_peek, pipe);
|
||||
|
||||
if (hb_rule_lcalpha_check(c) || hb_rule_ucalpha_check(c)) {
|
||||
// Name-based entity
|
||||
return HBS_ENTITY_TYPE_NAME;
|
||||
}
|
||||
|
||||
hb_eod_char_t c2 = HBE_CATCH(hb_proc_peek_eof_offset, pipe, 2);
|
||||
|
||||
if (c == '#' && c2 == 'x') {
|
||||
// Hexadecimal-based entity
|
||||
// NOTE: Check before decimal-based
|
||||
HBE_CATCH(hb_proc_skip_amount, pipe, 2);
|
||||
return HBS_ENTITY_TYPE_HEXADECIMAL;
|
||||
}
|
||||
|
||||
if (c == '#') {
|
||||
// Decimal-based entity
|
||||
HBE_CATCH(hb_proc_skip, pipe);
|
||||
return HBS_ENTITY_TYPE_DECIMAL;
|
||||
}
|
||||
|
||||
// Malformed entity
|
||||
HBE_PASS(_hbs_entity_syntax_error, so, pipe, -1, NULL, 0, HBE_PARSE_MALFORMED_ENTITY, "Invalid character after ampersand");
|
||||
}
|
||||
|
||||
void hbs_entity(hbe_err_t *hbe_err, hbu_streamoptions_t so, hb_proc_t pipe) {
|
||||
hb_list_char_t entity_raw = NULL;
|
||||
|
||||
HBE_CATCH_F(hb_proc_require_skip, pipe, '&');
|
||||
|
||||
// Quickly check and short circuit if BARE_AMPERSAND is suppressed
|
||||
// and next character is whitespace
|
||||
if (hbu_streamoptions_supressed_error(so, HBE_PARSE_BARE_AMPERSAND)) {
|
||||
hb_eod_char_t next = HBE_CATCH_F(hb_proc_peek_eoi, pipe);
|
||||
if (hb_rule_whitespace_check(next)) {
|
||||
HBE_CATCH_F(hb_proc_write, pipe, '&');
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int type = HBE_CATCH_F(_hbs_entity_process_prefix, pipe, so);
|
||||
if (!type) {
|
||||
// Syntax error that was suppressed
|
||||
goto finally;
|
||||
}
|
||||
|
||||
entity_raw = hb_list_char_create_size(0, HBS_ENTITY_MAX_ENTITY_LENGTH + 1);
|
||||
int under_max = 0;
|
||||
|
||||
for (int i = 0; i < HBS_ENTITY_MAX_ENTITY_LENGTH; i++) {
|
||||
hb_proc_char_t e = HBE_CATCH_F(hb_proc_skip, pipe);
|
||||
|
||||
if (e == ';') {
|
||||
under_max = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
int well_formed = 0;
|
||||
|
||||
switch (type) {
|
||||
case HBS_ENTITY_TYPE_NAME:
|
||||
well_formed = hb_rule_lcalpha_check(e) || hb_rule_ucalpha_check(e);
|
||||
break;
|
||||
|
||||
case HBS_ENTITY_TYPE_DECIMAL:
|
||||
well_formed = hb_rule_digit_check(e);
|
||||
break;
|
||||
|
||||
case HBS_ENTITY_TYPE_HEXADECIMAL:
|
||||
well_formed = hb_rule_hex_check(e);
|
||||
break;
|
||||
|
||||
default:
|
||||
HBE_PASS_F(_hbs_entity_interr_unknown_entity, type);
|
||||
}
|
||||
|
||||
if (!well_formed) {
|
||||
HBE_PASS_F(_hbs_entity_syntax_error, so, pipe, type, entity_raw, 0, HBE_PARSE_MALFORMED_ENTITY, "Characters after ampersand don't form entity");
|
||||
}
|
||||
|
||||
hb_list_char_append(entity_raw, e);
|
||||
}
|
||||
|
||||
if (!under_max) {
|
||||
// Malformed entity (too long)
|
||||
HBE_PASS_F(_hbs_entity_syntax_error, so, pipe, type, entity_raw, 0, HBE_PARSE_MALFORMED_ENTITY, "Malformed entity");
|
||||
}
|
||||
|
||||
int valid = 1;
|
||||
hb_proc_char_t *entity_raw_u = hb_list_char_underlying(entity_raw);
|
||||
uintmax_t code_point;
|
||||
|
||||
switch (type) {
|
||||
case HBS_ENTITY_TYPE_NAME:
|
||||
valid = hb_rule_entity_references_check(entity_raw_u);
|
||||
if (valid && so->decode_entities) {
|
||||
HBE_CATCH_F(hb_proc_write, pipe, hb_rule_entity_references_get(entity_raw_u));
|
||||
}
|
||||
break;
|
||||
|
||||
case HBS_ENTITY_TYPE_DECIMAL:
|
||||
case HBS_ENTITY_TYPE_HEXADECIMAL:
|
||||
code_point = strtoumax((char *) entity_raw_u, NULL, (type == HBS_ENTITY_TYPE_DECIMAL) ? 10 : 16);
|
||||
valid = errno == 0 && code_point <= 0x10FFFF;
|
||||
if (valid && so->decode_entities) {
|
||||
valid = HBE_CATCH_F(hb_proc_write_unicode, pipe, code_point);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
HBE_PASS_F(_hbs_entity_interr_unknown_entity, type);
|
||||
}
|
||||
|
||||
if (!valid) {
|
||||
HBE_PASS_F(_hbs_entity_syntax_error, so, pipe, type, entity_raw, 1, HBE_PARSE_INVALID_ENTITY, "Invalid entity");
|
||||
}
|
||||
|
||||
if (!so->decode_entities) {
|
||||
HBE_CATCH_F(_hbs_entity_write_literal, pipe, type, entity_raw, 1);
|
||||
}
|
||||
|
||||
finally:
|
||||
if (entity_raw != NULL) {
|
||||
hb_list_char_destroy(entity_raw);
|
||||
entity_raw = NULL;
|
||||
}
|
||||
}
|
|
@ -1,102 +0,0 @@
|
|||
// Declare first before content.c, as content.c depends on it
|
||||
void hbs_tag(hbe_err_t *hbe_err, hbu_streamoptions_t so, hb_proc_t pipe, hb_proc_char_t *parent);
|
||||
|
||||
#include "./tagname.c"
|
||||
#include "../attr/attr.c"
|
||||
#include "../content/script.c"
|
||||
#include "../content/style.c"
|
||||
#include "../content/html.c"
|
||||
|
||||
// $parent could be NULL
|
||||
void hbs_tag(hbe_err_t *hbe_err, hbu_streamoptions_t so, hb_proc_t pipe, hb_proc_char_t *parent) {
|
||||
hb_list_char_t opening_name = NULL;
|
||||
|
||||
int self_closing = 0;
|
||||
|
||||
HBE_CATCH_F(hb_proc_require, pipe, '<');
|
||||
opening_name = HBE_CATCH_F(hbs_tagname, so, pipe);
|
||||
|
||||
int last_attr_type = -1;
|
||||
|
||||
while (1) {
|
||||
// At the beginning of this loop, the last parsed unit was either the tag name
|
||||
// or an attribute (including its value, if it had one)
|
||||
size_t ws_accepted;
|
||||
if (so->remove_tag_whitespace) {
|
||||
ws_accepted = HBE_CATCH_F(hb_proc_skip_while_predicate, pipe, &hb_rule_whitespace_check);
|
||||
} else {
|
||||
ws_accepted = HBE_CATCH_F(hb_proc_accept_while_predicate, pipe, &hb_rule_whitespace_check);
|
||||
}
|
||||
|
||||
int end_of_tag = HBE_CATCH_F(hb_proc_accept_if, pipe, '>');
|
||||
if (end_of_tag) {
|
||||
break;
|
||||
}
|
||||
|
||||
self_closing = HBE_CATCH_F(hb_proc_accept_if_matches, pipe, "/>");
|
||||
if (self_closing) {
|
||||
if (!hbu_streamoptions_supressed_error(so, HBE_PARSE_SELF_CLOSING_TAG)) {
|
||||
hb_proc_THROW_F(pipe, HBE_PARSE_SELF_CLOSING_TAG, "Self-closing tag");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// HBE_PARSE_NO_SPACE_BEFORE_ATTR is not suppressable as then there would be difficulty
|
||||
// in determining what is the end of a tag/attribute name/attribute value
|
||||
if (!ws_accepted) {
|
||||
hb_proc_THROW_F(pipe, HBE_PARSE_NO_SPACE_BEFORE_ATTR, "No whitespace before attribute");
|
||||
}
|
||||
|
||||
if (so->remove_tag_whitespace) {
|
||||
if (last_attr_type != HBS_ATTR_QUOTED) {
|
||||
HBE_CATCH_F(hb_proc_write, pipe, ' ');
|
||||
}
|
||||
}
|
||||
|
||||
last_attr_type = HBE_CATCH_F(hbs_attr, so, pipe);
|
||||
}
|
||||
|
||||
hb_proc_char_t *tag_name = hb_list_char_underlying(opening_name);
|
||||
|
||||
// Non-standard tag checking is done in hbs_tagname
|
||||
if (parent != NULL && (
|
||||
!hb_rule_whitelistparents_allowed(tag_name, parent) ||
|
||||
!hb_rule_whitelistchildren_allowed(parent, tag_name) ||
|
||||
!hb_rule_blacklistparents_allowed(tag_name, parent) ||
|
||||
!hb_rule_blacklistchildren_allowed(parent, tag_name))) {
|
||||
hb_proc_THROW_F(pipe, HBE_PARSE_ILLEGAL_CHILD, "Tag can't be a child there");
|
||||
}
|
||||
|
||||
// Self-closing or void tag
|
||||
if (self_closing || hb_rule_voidtags_check(tag_name)) {
|
||||
goto finally;
|
||||
}
|
||||
|
||||
if (hb_list_char_compare_lit(opening_name, "script") == 0) {
|
||||
// Script tag
|
||||
HBE_CATCH_F(hbs_script, pipe);
|
||||
} else if (hb_list_char_compare_lit(opening_name, "style") == 0) {
|
||||
// Style tag
|
||||
HBE_CATCH_F(hbs_style, pipe);
|
||||
} else {
|
||||
// Content
|
||||
HBE_CATCH_F(hbs_content, so, pipe, tag_name);
|
||||
}
|
||||
|
||||
// Closing tag for non-void
|
||||
HBE_CATCH_F(hb_proc_require, pipe, '<');
|
||||
HBE_CATCH_F(hb_proc_require, pipe, '/');
|
||||
hb_list_char_t closing_name = HBE_CATCH_F(hbs_tagname, so, pipe);
|
||||
HBE_CATCH_F(hb_proc_require, pipe, '>');
|
||||
|
||||
if (!hb_list_char_equal(opening_name, closing_name)) {
|
||||
hb_proc_THROW_F(pipe, HBE_PARSE_UNCLOSED_TAG, "Tag not closed (expected `%s` closing tag, got `%s`)", tag_name, hb_list_char_underlying(closing_name));
|
||||
}
|
||||
|
||||
finally:
|
||||
if (opening_name) {
|
||||
hb_list_char_destroy(opening_name);
|
||||
opening_name = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
23
src/hb/cfg.c
23
src/hb/cfg.c
|
@ -1,14 +1,15 @@
|
|||
#include <hb/cfg.h>
|
||||
|
||||
bool hb_cfg_should_min(hb_cfg_tags_set* set, nh_view_str* view) {
|
||||
switch (set->mode) {
|
||||
case HB_CFG_SET_MODE_NONE:
|
||||
return false;
|
||||
case HB_CFG_SET_MODE_ALL:
|
||||
return true;
|
||||
case HB_CFG_SET_MODE_ALLOW:
|
||||
return hb_set_tag_names_has(set->set, view);
|
||||
default: /* case HB_CFG_SET_MODE_DENY: */
|
||||
return !hb_set_tag_names_has(set->set, view);
|
||||
}
|
||||
bool hb_cfg_should_min(hb_cfg_tags_set* set, nh_view_str* view)
|
||||
{
|
||||
switch (set->mode) {
|
||||
case HB_CFG_SET_MODE_NONE:
|
||||
return false;
|
||||
case HB_CFG_SET_MODE_ALL:
|
||||
return true;
|
||||
case HB_CFG_SET_MODE_ALLOW:
|
||||
return view != NULL && hb_set_tag_names_has(set->set, view);
|
||||
default: /* case HB_CFG_SET_MODE_DENY: */
|
||||
return view == NULL || !hb_set_tag_names_has(set->set, view);
|
||||
}
|
||||
}
|
||||
|
|
14
src/hb/cfg.h
14
src/hb/cfg.h
|
@ -1,19 +1,19 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/err.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef enum {
|
||||
HB_CFG_SET_MODE_NONE, // i.e. don't minify ever
|
||||
HB_CFG_SET_MODE_ALLOW,
|
||||
HB_CFG_SET_MODE_DENY,
|
||||
HB_CFG_SET_MODE_ALL, // i.e. minify all without exception
|
||||
HB_CFG_SET_MODE_NONE, // i.e. don't minify ever
|
||||
HB_CFG_SET_MODE_ALLOW,
|
||||
HB_CFG_SET_MODE_DENY,
|
||||
HB_CFG_SET_MODE_ALL, // i.e. minify all without exception
|
||||
} hb_cfg_tags_set_mode;
|
||||
|
||||
typedef struct {
|
||||
hb_cfg_tags_set_mode mode;
|
||||
hb_set_tag_names* set;
|
||||
hb_cfg_tags_set_mode mode;
|
||||
hb_set_tag_names* set;
|
||||
} hb_cfg_tags_set;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -5,8 +5,10 @@ NH_MAP_VIEW_STR_IMPL(hb_map_entity_references, int32_t, -1);
|
|||
|
||||
// Data structure for a set of tag names.
|
||||
NH_SET_VIEW_ISTR_IMPL(hb_set_tag_names);
|
||||
#define hb_set_tag_names_add_whole_literal(set, str) hb_set_tag_names_add_whole_array(set, nh_litarr(str))
|
||||
#define hb_set_tag_names_add_whole_literal(set, str) \
|
||||
hb_set_tag_names_add_whole_array(set, nh_litarr(str))
|
||||
|
||||
// Data structure for mapping tag names to sets of tag names.
|
||||
NH_MAP_VIEW_ISTR_IMPL(hb_map_tag_relations, hb_set_tag_names*, NULL);
|
||||
#define hb_map_tag_relations_set_whole_literal(map, str, v) hb_map_tag_relations_set_whole_array(map, nh_litarr(str), v)
|
||||
#define hb_map_tag_relations_set_whole_literal(map, str, v) \
|
||||
hb_map_tag_relations_set_whole_array(map, nh_litarr(str), v)
|
||||
|
|
|
@ -16,8 +16,10 @@ NH_MAP_VIEW_STR_PROTO(hb_map_entity_references, int32_t);
|
|||
|
||||
// Data structure for a set of tag names.
|
||||
NH_SET_VIEW_ISTR_PROTO(hb_set_tag_names);
|
||||
#define hb_set_tag_names_add_whole_literal(set, str) hb_set_tag_names_add_whole_array(set, nh_litarr(str))
|
||||
#define hb_set_tag_names_add_whole_literal(set, str) \
|
||||
hb_set_tag_names_add_whole_array(set, nh_litarr(str))
|
||||
|
||||
// Data structure for mapping tag names to sets of tag names.
|
||||
NH_MAP_VIEW_ISTR_PROTO(hb_map_tag_relations, hb_set_tag_names*);
|
||||
#define hb_map_tag_relations_set_whole_literal(map, str, v) hb_map_tag_relations_set_whole_array(map, nh_litarr(str), v)
|
||||
#define hb_map_tag_relations_set_whole_literal(map, str, v) \
|
||||
hb_map_tag_relations_set_whole_array(map, nh_litarr(str), v)
|
||||
|
|
12
src/hb/err.h
12
src/hb/err.h
|
@ -3,22 +3,14 @@
|
|||
#include <hb/collection.h>
|
||||
|
||||
typedef enum {
|
||||
// WARNING: The __HB_ERR_COUNT value only works if the first value of this enum is set to zero.
|
||||
// WARNING: The __HB_ERR_COUNT value only works if the first value of
|
||||
// this enum is set to zero.
|
||||
HB_ERR_OK = 0,
|
||||
|
||||
HB_ERR_INTERR_UNKNOWN_ENTITY_TYPE,
|
||||
HB_ERR_INTERR_UNKNOWN_ATTR_QUOTE,
|
||||
HB_ERR_INTERR_UNKNOWN_CONTENT_NEXT_STATE,
|
||||
|
||||
HB_ERR_CLI_TOO_MANY_OPTIONS,
|
||||
HB_ERR_CLI_INVALID_TAG_SET,
|
||||
HB_ERR_CLI_INVALID_TAG,
|
||||
HB_ERR_CLI_INVALID_SUPPRESSABLE_ERROR,
|
||||
|
||||
HB_ERR_IO_FOPEN_FAIL,
|
||||
HB_ERR_IO_FCLOSE_FAIL,
|
||||
HB_ERR_IO_FREAD_FAIL,
|
||||
HB_ERR_IO_FWRITE_FAIL,
|
||||
|
||||
HB_ERR_PARSE_MALFORMED_ENTITY,
|
||||
HB_ERR_PARSE_INVALID_ENTITY,
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <hb/cfg.h>
|
||||
#include <hb/hyperbuild.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/unit.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/unistd.h>
|
||||
|
||||
void hyperbuild_init(void)
|
||||
{
|
||||
hb_rule_init();
|
||||
}
|
||||
|
||||
// Rate to read from file, set to 4 KiB.
|
||||
#define READ_RATE 4096
|
||||
// Rate to resize buffer containing file contents, set to 768 KiB.
|
||||
#define GROWTH_RATE 786432
|
||||
|
||||
static void _read_file(char const* file, hb_rune** out, size_t* out_len)
|
||||
{
|
||||
int fd = -1;
|
||||
bool success = false;
|
||||
hb_rune* output = NULL;
|
||||
|
||||
// Open file.
|
||||
fd = open(file, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
// Failed to open file.
|
||||
goto finally;
|
||||
}
|
||||
|
||||
// Get file size.
|
||||
struct stat stats;
|
||||
if (fstat(fd, &stats) != 0) {
|
||||
// Failed to get file size.
|
||||
goto finally;
|
||||
}
|
||||
off_t size = stats.st_size;
|
||||
|
||||
// Allocate memory for buffer.
|
||||
output = malloc((size + 1) * sizeof(hb_rune));
|
||||
size_t output_capacity = size;
|
||||
size_t output_next = 0;
|
||||
// Read into buffer.
|
||||
while (true) {
|
||||
// Check if there's enough room to read READ_RATE and reallocate
|
||||
// if necessary.
|
||||
if (output_next + READ_RATE >= output_capacity) {
|
||||
output_capacity += GROWTH_RATE;
|
||||
// Make room for terminator.
|
||||
hb_rune* new_output =
|
||||
realloc(output, output_capacity + 1);
|
||||
if (new_output == NULL) {
|
||||
// Failed to reallocate memory.
|
||||
goto finally;
|
||||
}
|
||||
output = new_output;
|
||||
}
|
||||
|
||||
// Attempt to read READ_RATE.
|
||||
ssize_t read_amount = read(fd, output + output_next, READ_RATE);
|
||||
if (read_amount < 0) {
|
||||
// Failed to read.
|
||||
goto finally;
|
||||
}
|
||||
|
||||
if (read_amount == 0) {
|
||||
// Reached EOF.
|
||||
break;
|
||||
}
|
||||
output_next += read_amount;
|
||||
}
|
||||
|
||||
output[output_next] = '\xFF';
|
||||
*out_len = output_next;
|
||||
success = true;
|
||||
|
||||
finally:
|
||||
if (fd >= 0) {
|
||||
// File descriptor is valid (success or not), close it.
|
||||
if (close(fd) != 0) {
|
||||
// Failed to close file descriptor.
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
if (!success && output != NULL) {
|
||||
// Failed to read file, free memory and return NULL.
|
||||
free(output);
|
||||
output = NULL;
|
||||
}
|
||||
*out = output;
|
||||
}
|
||||
|
||||
static void _set_file_read_error(hb_proc_result* result)
|
||||
{
|
||||
char* msg = malloc(HB_PROC_ERROR_CUSTOM_SIZE * sizeof(char));
|
||||
snprintf(msg, HB_PROC_ERROR_CUSTOM_SIZE,
|
||||
"Failed to read file with system error %d", errno);
|
||||
result->code = HB_ERR_IO_FREAD_FAIL;
|
||||
result->msg = msg;
|
||||
result->pos = 0;
|
||||
}
|
||||
|
||||
hb_rune* hyperbuild_from_file(char const* file, hb_cfg* cfg,
|
||||
hb_proc_result* result)
|
||||
{
|
||||
hb_rune* input;
|
||||
size_t input_size;
|
||||
_read_file(file, &input, &input_size);
|
||||
if (input == NULL) {
|
||||
_set_file_read_error(result);
|
||||
}
|
||||
|
||||
hyperbuild(input, input_size, input, cfg, result);
|
||||
return input;
|
||||
}
|
||||
|
||||
void hyperbuild_from_file_custom_output(char const* file, hb_rune* output,
|
||||
hb_cfg* cfg, hb_proc_result* result)
|
||||
{
|
||||
hb_rune* input;
|
||||
size_t input_size;
|
||||
_read_file(file, &input, &input_size);
|
||||
if (input == NULL) {
|
||||
_set_file_read_error(result);
|
||||
}
|
||||
|
||||
hyperbuild(input, input_size, output, cfg, result);
|
||||
free(input);
|
||||
}
|
||||
|
||||
hb_rune* hyperbuild_from_input(hb_rune* input, size_t input_size, hb_cfg* cfg,
|
||||
hb_proc_result* result)
|
||||
{
|
||||
hb_rune* output = malloc((input_size + 1) * sizeof(hb_rune));
|
||||
// This function will ensure output is null terminated.
|
||||
hyperbuild(input, input_size, output, cfg, result);
|
||||
return output;
|
||||
}
|
||||
|
||||
void hyperbuild_in_place(hb_rune* input, size_t input_size, hb_cfg* cfg,
|
||||
hb_proc_result* result)
|
||||
{
|
||||
hyperbuild(input, input_size, input, cfg, result);
|
||||
}
|
||||
|
||||
void hyperbuild(hb_rune* input, size_t input_size, hb_rune* output, hb_cfg* cfg,
|
||||
hb_proc_result* result)
|
||||
{
|
||||
input[input_size] = '\xFF';
|
||||
|
||||
hb_proc proc = {
|
||||
.cfg = cfg,
|
||||
.src = input,
|
||||
.src_len = input_size,
|
||||
.src_next = 0,
|
||||
.out = output,
|
||||
.out_next = 0,
|
||||
.result = result,
|
||||
};
|
||||
|
||||
if (!setjmp(proc.start)) {
|
||||
hb_unit_content_html(&proc, NULL);
|
||||
// No errors occurred.
|
||||
result->code = HB_ERR_OK;
|
||||
result->pos = proc.out_next;
|
||||
result->msg = NULL;
|
||||
|
||||
// Null terminate output.
|
||||
output[proc.out_next] = '\0';
|
||||
} else {
|
||||
// An error occurred.
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include <hb/cfg.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rune.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Initialise internal structures and data used in processing.
|
||||
* This function must be called before using any other hyperbuild function.
|
||||
*/
|
||||
void hyperbuild_init(void);
|
||||
|
||||
/**
|
||||
* Read a file and run hyperbuild on the contents. Output will be null
|
||||
* terminated if no error occurs.
|
||||
*
|
||||
* @param file path to the file
|
||||
* @param cfg configuration to use
|
||||
* @param[out] result where to write any resulting error information
|
||||
* @return pointer to a heap-allocated array containing processed output that
|
||||
* needs to be freed
|
||||
*/
|
||||
hb_rune* hyperbuild_from_file(char const* file, hb_cfg* cfg,
|
||||
hb_proc_result* result);
|
||||
|
||||
/**
|
||||
* Read a file and run hyperbuild on the contents, writing to {@param output}.
|
||||
* Output will be null terminated if no error occurs. WARNING: Does not check if
|
||||
* {@param output} is large enough. It should at least match the size of the
|
||||
* file.
|
||||
*
|
||||
* @param file path to the file
|
||||
* @param output output array to write to
|
||||
* @param cfg configuration to use
|
||||
* @param[out] result where to write any resulting error information
|
||||
*/
|
||||
void hyperbuild_from_file_custom_output(char const* file, hb_rune* output,
|
||||
hb_cfg* cfg, hb_proc_result* result);
|
||||
|
||||
/**
|
||||
* Run hyperbuild on an input array and write to a heap-allocated array. Output
|
||||
* will be null terminated if no error occurs. WARNING: Input must end with
|
||||
* '\xFF' or '\0', and {@param input_size} must not include the terminator.
|
||||
*
|
||||
* @param input input array to process
|
||||
* @param cfg configuration to use
|
||||
* @param[out] result where to write any resulting error information
|
||||
* @return pointer to a heap-allocated array containing processed output that
|
||||
* needs to be freed
|
||||
*/
|
||||
hb_rune* hyperbuild_from_input(hb_rune* input, size_t input_size, hb_cfg* cfg,
|
||||
hb_proc_result* result);
|
||||
|
||||
/**
|
||||
* Run hyperbuild in place on an input array. Output will be null terminated if
|
||||
* no error occurs. WARNING: Input must end with '\xFF' or '\0', and {@param
|
||||
* input_size} must not include the terminator.
|
||||
*
|
||||
* @param input input array to process
|
||||
* @param cfg configuration to use
|
||||
* @param[out] result where to write any resulting error information
|
||||
*/
|
||||
void hyperbuild_in_place(hb_rune* input, size_t input_size, hb_cfg* cfg,
|
||||
hb_proc_result* result);
|
||||
|
||||
/**
|
||||
* Run hyperbuild on an input array and write to {@param output}. Output will be
|
||||
* null terminated if no error occurs. WARNING: Input must end with '\xFF' or
|
||||
* '\0', and {@param input_size} must not include the terminator. WARNING: Does
|
||||
* not check if {@param output} is large enough. It should at least match the
|
||||
* size of the input.
|
||||
*
|
||||
* @param input input array to process
|
||||
* @param output output array to write to
|
||||
* @param cfg configuration to use
|
||||
* @param[out] result where to write any resulting error information
|
||||
*/
|
||||
void hyperbuild(hb_rune* input, size_t input_size, hb_rune* output, hb_cfg* cfg,
|
||||
hb_proc_result* result);
|
165
src/hb/proc.h
165
src/hb/proc.h
|
@ -1,66 +1,78 @@
|
|||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/cfg.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/err.h>
|
||||
#include <hb/rune.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
// Result of processing.
|
||||
typedef struct {
|
||||
// The error code, which could be HB_ERR_OK if no errors occurred (i.e. processing completed successfully).
|
||||
hb_err code;
|
||||
// Error message if an error occurred.
|
||||
char* msg;
|
||||
// Whether or not msg should be freed on destruction.
|
||||
bool custom;
|
||||
// The value of src_next at the time of error.
|
||||
size_t pos;
|
||||
} hb_proc_result;
|
||||
|
||||
// Processing state of a file. Most fields are used internally and set during processing.
|
||||
// Single use only; create one per processing.
|
||||
typedef struct {
|
||||
// Settings for this run.
|
||||
hb_cfg* cfg;
|
||||
// Name of the source, usually the file name or something descriptive and unique such as stdin.
|
||||
char* name;
|
||||
// This will be set just before starting to process so that when an error occurs, the processor will jump back to where this was set.
|
||||
// This is known as a long jump and saves having to check if an error occurred at every stage of processing.
|
||||
jmp_buf start;
|
||||
|
||||
// Source data, represented as an array of bytes (see hb_rune).
|
||||
// To avoid having repeated checks and a dedicated marker/struct field for EOF, the src array will terminate with HB_EOF, an invalid Unicode byte.
|
||||
hb_rune* src;
|
||||
// Length of the source data.
|
||||
size_t src_len;
|
||||
// Offset of the next unconsumed character.
|
||||
// This means that when src_next == src_len, there are no more unconsumed characters, the end has been reached, and the input has been processed.
|
||||
size_t src_next;
|
||||
|
||||
// Where to write the output.
|
||||
hb_rune* out;
|
||||
// Offset of the next unwritten space.
|
||||
size_t out_next;
|
||||
// Result of processing, set on completion or error.
|
||||
hb_proc_result result;
|
||||
} hb_proc;
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// Memory to allocate for a custom error message.
|
||||
#define HB_PROC_ERROR_CUSTOM_SIZE 512
|
||||
|
||||
// Signature for a predicate function that returns true or false given a character.
|
||||
// Result of processing.
|
||||
typedef struct {
|
||||
// The error code, which could be HB_ERR_OK if no errors occurred (i.e.
|
||||
// processing completed successfully).
|
||||
hb_err code;
|
||||
// Error message if an error occurred. Allocated on heap and must be
|
||||
// freed.
|
||||
char* msg;
|
||||
// The value of src_next at the time of error.
|
||||
size_t pos;
|
||||
} hb_proc_result;
|
||||
|
||||
// Processing state of a file. Most fields are used internally and set during
|
||||
// processing. Single use only; create one per processing.
|
||||
typedef struct {
|
||||
// Settings for this run.
|
||||
hb_cfg* cfg;
|
||||
// This will be set just before starting to process so that when an
|
||||
// error occurs, the processor will jump back to where this was set.
|
||||
// This is known as a long jump and saves having to check if an error
|
||||
// occurred at every stage of processing.
|
||||
jmp_buf start;
|
||||
|
||||
// Source data, represented as an array of bytes (see hb_rune).
|
||||
// To avoid having repeated checks and a dedicated marker/struct field
|
||||
// for EOF, the src array will terminate with HB_EOF, an invalid Unicode
|
||||
// byte.
|
||||
hb_rune* src;
|
||||
// Length of the source data.
|
||||
size_t src_len;
|
||||
// Offset of the next unconsumed character.
|
||||
// This means that when src_next == src_len, there are no more
|
||||
// unconsumed characters, the end has been reached, and the input has
|
||||
// been processed.
|
||||
size_t src_next;
|
||||
|
||||
// Where to write the output.
|
||||
hb_rune* out;
|
||||
// Offset of the next unwritten space.
|
||||
size_t out_next;
|
||||
// Result of processing, set on completion or error.
|
||||
// There's no point in embedding it inside hb_proc, as it needs to be
|
||||
// passed back to caller anyway.
|
||||
hb_proc_result* result;
|
||||
} hb_proc;
|
||||
|
||||
// Signature for a predicate function that returns true or false given a
|
||||
// character.
|
||||
typedef bool hb_proc_pred(hb_rune);
|
||||
|
||||
// Method declarations for implementations in source files under hb/proc, sorted by declaration order, grouped by file name in alphabetical order.
|
||||
// Method declarations for implementations in source files under hb/proc, sorted
|
||||
// by declaration order, grouped by file name in alphabetical order.
|
||||
|
||||
hb_rune hb_proc_accept(hb_proc* proc);
|
||||
void hb_proc_accept_count(hb_proc* proc, size_t count);
|
||||
bool hb_proc_accept_if(hb_proc* proc, hb_rune c);
|
||||
bool hb_proc_accept_if_not(hb_proc* proc, hb_rune c);
|
||||
size_t hb_proc_accept_if_matches(hb_proc* proc, char const* match);
|
||||
#define hb_proc_accept_if_matches(proc, match) \
|
||||
hb_proc_accept_if_matches_len(proc, match, \
|
||||
hb_string_literal_length(match))
|
||||
size_t hb_proc_accept_if_matches_len(hb_proc* proc, char const* match,
|
||||
size_t match_len);
|
||||
size_t hb_proc_accept_if_matches_line_terminator(hb_proc* proc);
|
||||
bool hb_proc_accept_if_predicate(hb_proc* proc, hb_proc_pred* pred);
|
||||
size_t hb_proc_accept_while_predicate(hb_proc* proc, hb_proc_pred* pred);
|
||||
|
@ -69,17 +81,28 @@ void hb_proc_bounds_assert_not_eof(hb_proc* proc);
|
|||
bool hb_proc_bounds_check_offset(hb_proc* proc, size_t offset);
|
||||
void hb_proc_bounds_assert_offset(hb_proc* proc, size_t offset);
|
||||
|
||||
#define hb_proc_matches(proc, match) hb_proc_matches_len(proc, match, hb_string_literal_length(match))
|
||||
#define hb_proc_matches(proc, match) \
|
||||
hb_proc_matches_len(proc, match, hb_string_literal_length(match))
|
||||
size_t hb_proc_matches_len(hb_proc* proc, char const* match, size_t match_len);
|
||||
#define hb_proc_matches_i(proc, match) hb_proc_matches_len_i(proc, match, hb_string_literal_length(match))
|
||||
size_t hb_proc_matches_len_i(hb_proc* proc, char const* match, size_t match_len);
|
||||
#define hb_proc_matches_i(proc, match) \
|
||||
hb_proc_matches_len_i(proc, match, hb_string_literal_length(match))
|
||||
size_t hb_proc_matches_len_i(hb_proc* proc, char const* match,
|
||||
size_t match_len);
|
||||
size_t hb_proc_matches_line_terminator(hb_proc* proc);
|
||||
|
||||
#define hb_proc_error_if_not_suppressed(proc, code, msg) if (!hb_err_set_has(&(proc)->cfg->suppressed_errors, code)) hb_proc_error(proc, code, msg);
|
||||
#define hb_proc_error(proc, code, msg) hb_proc_error_pos(proc, code, (proc)->src_next, msg)
|
||||
void hb_proc_error_pos(hb_proc* proc, hb_err code, size_t pos, char const* msg);
|
||||
#define hb_proc_error_custom(proc, code, format, ...) hb_proc_error_custom_pos(proc, code, (proc)->src_next, format, __VA_ARGS__)
|
||||
void hb_proc_error_custom_pos(hb_proc* proc, hb_err code, size_t pos, char const* format, ...);
|
||||
#define hb_proc_error_if_not_suppressed(proc, code, msg) \
|
||||
if (!hb_err_set_has(&(proc)->cfg->suppressed_errors, code)) \
|
||||
hb_proc_error(proc, code, msg);
|
||||
#define hb_proc_error(proc, code, msg) \
|
||||
hb_proc_error_pos_len(proc, code, (proc)->src_next, msg, \
|
||||
hb_string_literal_length(msg))
|
||||
void hb_proc_error_pos_len(hb_proc* proc, hb_err code, size_t pos,
|
||||
char const* msg, size_t msg_len);
|
||||
#define hb_proc_error_custom(proc, code, format, ...) \
|
||||
hb_proc_error_custom_pos(proc, code, (proc)->src_next, format, \
|
||||
__VA_ARGS__)
|
||||
void hb_proc_error_custom_pos(hb_proc* proc, hb_err code, size_t pos,
|
||||
char const* format, ...);
|
||||
|
||||
hb_eof_rune hb_proc_peek_eof(hb_proc* proc);
|
||||
hb_rune hb_proc_peek(hb_proc* proc);
|
||||
|
@ -88,23 +111,33 @@ hb_rune hb_proc_peek_offset(hb_proc* proc, size_t offset);
|
|||
|
||||
void hb_proc_require(hb_proc* proc, hb_rune c);
|
||||
hb_rune hb_proc_require_skip(hb_proc* proc, hb_rune c);
|
||||
hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred, char const* name);
|
||||
hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred, char const* name);
|
||||
void hb_proc_require_match(hb_proc* proc, char const* match);
|
||||
void hb_proc_require_skip_match(hb_proc* proc, char const* match);
|
||||
hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred,
|
||||
char const* name);
|
||||
hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred,
|
||||
char const* name);
|
||||
#define hb_proc_require_match(proc, match) \
|
||||
hb_proc_require_match_len(proc, match, hb_string_literal_length(match))
|
||||
void hb_proc_require_match_len(hb_proc* proc, char const* match,
|
||||
size_t match_len);
|
||||
#define hb_proc_require_skip_match(proc, match) \
|
||||
hb_proc_require_skip_match_len(proc, match, \
|
||||
hb_string_literal_length(match))
|
||||
void hb_proc_require_skip_match_len(hb_proc* proc, char const* match,
|
||||
size_t match_len);
|
||||
|
||||
hb_rune hb_proc_skip(hb_proc* proc);
|
||||
size_t hb_proc_skip_amount(hb_proc* proc, size_t amount);
|
||||
size_t hb_proc_skip_if(hb_proc* proc, hb_rune c);
|
||||
size_t hb_proc_skip_while_predicate(hb_proc* proc, hb_proc_pred* pred);
|
||||
#define hb_proc_skip_if_matches(proc, match) hb_proc_skip_amount(proc, hb_proc_matches(proc, match))
|
||||
#define hb_proc_skip_if_matches(proc, match) \
|
||||
hb_proc_skip_amount(proc, hb_proc_matches(proc, match))
|
||||
|
||||
#define hb_proc_view_init_src(name, proc) \
|
||||
nh_view_str name; \
|
||||
nh_view_str_init(&name, (proc)->src, 0, 0)
|
||||
#define hb_proc_view_init_out(name, proc) \
|
||||
nh_view_str name; \
|
||||
nh_view_str_init(&name, (proc)->out, 0, 0)
|
||||
#define hb_proc_view_init_src(name, proc) \
|
||||
nh_view_str name; \
|
||||
nh_view_str_init(&name, (proc)->src, 0, 0)
|
||||
#define hb_proc_view_init_out(name, proc) \
|
||||
nh_view_str name; \
|
||||
nh_view_str_init(&name, (proc)->out, 0, 0)
|
||||
void hb_proc_view_start_with_src_next(nh_view_str* view, hb_proc* proc);
|
||||
void hb_proc_view_end_with_src_prev(nh_view_str* view, hb_proc* proc);
|
||||
void hb_proc_view_start_with_out_next(nh_view_str* view, hb_proc* proc);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rune.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* Accept the next character.
|
||||
|
@ -11,17 +11,18 @@
|
|||
* @return next character
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END
|
||||
*/
|
||||
hb_rune hb_proc_accept(hb_proc* proc) {
|
||||
// Get the next character, throwing if EOF.
|
||||
hb_rune c = hb_proc_peek(proc);
|
||||
hb_rune hb_proc_accept(hb_proc* proc)
|
||||
{
|
||||
// Get the next character, throwing if EOF.
|
||||
hb_rune c = hb_proc_peek(proc);
|
||||
|
||||
// Append to output.
|
||||
hb_proc_write(proc, c);
|
||||
// Append to output.
|
||||
hb_proc_write(proc, c);
|
||||
|
||||
// Mark character as consumed.
|
||||
proc->src_next++;
|
||||
// Mark character as consumed.
|
||||
proc->src_next++;
|
||||
|
||||
return c;
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -32,13 +33,14 @@ hb_rune hb_proc_accept(hb_proc* proc) {
|
|||
* @param count amount of characters
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END
|
||||
*/
|
||||
void hb_proc_accept_count(hb_proc* proc, size_t count) {
|
||||
hb_proc_bounds_assert_offset(proc, count);
|
||||
void hb_proc_accept_count(hb_proc* proc, size_t count)
|
||||
{
|
||||
hb_proc_bounds_assert_offset(proc, count);
|
||||
|
||||
memcpy(&proc->out[proc->out_next], &proc->src[proc->src_next], count);
|
||||
memcpy(&proc->out[proc->out_next], &proc->src[proc->src_next], count);
|
||||
|
||||
proc->src_next += count;
|
||||
proc->out_next += count;
|
||||
proc->src_next += count;
|
||||
proc->out_next += count;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -50,17 +52,18 @@ void hb_proc_accept_count(hb_proc* proc, size_t count) {
|
|||
* @param c character to match
|
||||
* @return false if nothing was accepted, true otherwise
|
||||
*/
|
||||
bool hb_proc_accept_if(hb_proc* proc, hb_rune c) {
|
||||
hb_eof_rune n = hb_proc_peek_eof(proc);
|
||||
bool hb_proc_accept_if(hb_proc* proc, hb_rune c)
|
||||
{
|
||||
hb_eof_rune n = hb_proc_peek_eof(proc);
|
||||
|
||||
// n != c takes care of n == HB_EOF
|
||||
if (n != c) {
|
||||
return false;
|
||||
}
|
||||
// n != c takes care of n == HB_EOF
|
||||
if (n != c) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hb_proc_accept(proc);
|
||||
hb_proc_accept(proc);
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -72,17 +75,18 @@ bool hb_proc_accept_if(hb_proc* proc, hb_rune c) {
|
|||
* @param c character to not match
|
||||
* @return false if nothing was accepted, true otherwise
|
||||
*/
|
||||
bool hb_proc_accept_if_not(hb_proc* proc, hb_rune c) {
|
||||
hb_eof_rune n = hb_proc_peek_eof(proc);
|
||||
bool hb_proc_accept_if_not(hb_proc* proc, hb_rune c)
|
||||
{
|
||||
hb_eof_rune n = hb_proc_peek_eof(proc);
|
||||
|
||||
// n == c takes care of n != HB_EOF
|
||||
if (n == c) {
|
||||
return false;
|
||||
}
|
||||
// n == c takes care of n != HB_EOF
|
||||
if (n == c) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hb_proc_accept(proc);
|
||||
hb_proc_accept(proc);
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -92,16 +96,17 @@ bool hb_proc_accept_if_not(hb_proc* proc, hb_rune c) {
|
|||
*
|
||||
* @param proc proc
|
||||
* @param match characters to match
|
||||
* @param match_len length of {@arg match}
|
||||
* @return 0 if nothing was accepted, length of `match` otherwise
|
||||
*/
|
||||
size_t hb_proc_accept_if_matches(hb_proc* proc, char const* match) {
|
||||
size_t match_len = hb_proc_matches(proc, match);
|
||||
size_t hb_proc_accept_if_matches_len(hb_proc* proc, char const* match,
|
||||
size_t match_len)
|
||||
{
|
||||
if (hb_proc_matches_len(proc, match, match_len)) {
|
||||
hb_proc_accept_count(proc, match_len);
|
||||
}
|
||||
|
||||
if (match_len) {
|
||||
hb_proc_accept_count(proc, match_len);
|
||||
}
|
||||
|
||||
return match_len;
|
||||
return match_len;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -111,14 +116,15 @@ size_t hb_proc_accept_if_matches(hb_proc* proc, char const* match) {
|
|||
* @param proc proc
|
||||
* @return amount of characters matched
|
||||
*/
|
||||
size_t hb_proc_accept_if_matches_line_terminator(hb_proc* proc) {
|
||||
size_t match_len = hb_proc_matches_line_terminator(proc);
|
||||
size_t hb_proc_accept_if_matches_line_terminator(hb_proc* proc)
|
||||
{
|
||||
size_t match_len = hb_proc_matches_line_terminator(proc);
|
||||
|
||||
if (match_len) {
|
||||
hb_proc_accept_count(proc, match_len);
|
||||
}
|
||||
if (match_len) {
|
||||
hb_proc_accept_count(proc, match_len);
|
||||
}
|
||||
|
||||
return match_len;
|
||||
return match_len;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -129,16 +135,17 @@ size_t hb_proc_accept_if_matches_line_terminator(hb_proc* proc) {
|
|||
* @param pred predicate
|
||||
* @return false if nothing was accepted, true otherwise
|
||||
*/
|
||||
bool hb_proc_accept_if_predicate(hb_proc* proc, hb_proc_pred* pred) {
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
bool hb_proc_accept_if_predicate(hb_proc* proc, hb_proc_pred* pred)
|
||||
{
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
|
||||
if (c == HB_EOF || !(*pred)((hb_rune) c)) {
|
||||
return false;
|
||||
}
|
||||
if (c == HB_EOF || !(*pred)((hb_rune) c)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hb_proc_accept(proc);
|
||||
hb_proc_accept(proc);
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -149,12 +156,13 @@ bool hb_proc_accept_if_predicate(hb_proc* proc, hb_proc_pred* pred) {
|
|||
* @param pred predicate
|
||||
* @return amount of characters accepted
|
||||
*/
|
||||
size_t hb_proc_accept_while_predicate(hb_proc* proc, hb_proc_pred* pred) {
|
||||
size_t count = 0;
|
||||
size_t hb_proc_accept_while_predicate(hb_proc* proc, hb_proc_pred* pred)
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
while (hb_proc_accept_if_predicate(proc, pred)) {
|
||||
count++;
|
||||
}
|
||||
while (hb_proc_accept_if_predicate(proc, pred)) {
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
return count;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include <stdbool.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rune.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/**
|
||||
* Assert that there are still unconsumed source characters remaining.
|
||||
|
@ -8,34 +8,39 @@
|
|||
* @param proc proc
|
||||
* @throws HB_ERR_PARSE_UNEXPECTED_END if the end of the source has been reached
|
||||
*/
|
||||
void hb_proc_bounds_assert_not_eof(hb_proc* proc) {
|
||||
if (proc->src_next == proc->src_len) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END, "Unexpected end of input");
|
||||
}
|
||||
void hb_proc_bounds_assert_not_eof(hb_proc* proc)
|
||||
{
|
||||
if (proc->src_next == proc->src_len) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END,
|
||||
"Unexpected end of input");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that `offset` characters from next does not exceed the end of the source.
|
||||
* When `offset` is 0, it represents the next unconsumed character.
|
||||
* Check that `offset` characters from next does not exceed the end of the
|
||||
* source. When `offset` is 0, it represents the next unconsumed character.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param offset
|
||||
* @return true if src_next + offset <= src_len
|
||||
*/
|
||||
bool hb_proc_bounds_check_offset(hb_proc* proc, size_t offset) {
|
||||
return proc->src_next + offset <= proc->src_len;
|
||||
bool hb_proc_bounds_check_offset(hb_proc* proc, size_t offset)
|
||||
{
|
||||
return proc->src_next + offset <= proc->src_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that `offset` characters from next does not exceed the end of the source.
|
||||
* When `offset` is 0, it represents the next unconsumed character.
|
||||
* Assert that `offset` characters from next does not exceed the end of the
|
||||
* source. When `offset` is 0, it represents the next unconsumed character.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param offset
|
||||
* @throws HB_ERR_PARSE_UNEXPECTED_END if `offset` exceeds end
|
||||
*/
|
||||
void hb_proc_bounds_assert_offset(hb_proc* proc, size_t offset) {
|
||||
if (!hb_proc_bounds_check_offset(proc, offset)) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END, "Unexpected end of input");
|
||||
}
|
||||
void hb_proc_bounds_assert_offset(hb_proc* proc, size_t offset)
|
||||
{
|
||||
if (!hb_proc_bounds_check_offset(proc, offset)) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END,
|
||||
"Unexpected end of input");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,28 +1,36 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <hb/proc.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void hb_proc_error_setandjmp(hb_proc* proc, hb_err code, size_t pos, char* msg, bool custom) {
|
||||
proc->result.code = code;
|
||||
proc->result.pos = pos;
|
||||
proc->result.msg = msg;
|
||||
proc->result.custom = custom;
|
||||
longjmp(proc->start, 1);
|
||||
static void hb_proc_error_setandjmp(hb_proc* proc, hb_err code, size_t pos,
|
||||
char* msg)
|
||||
{
|
||||
proc->result->code = code;
|
||||
proc->result->pos = pos;
|
||||
proc->result->msg = msg;
|
||||
longjmp(proc->start, 1);
|
||||
}
|
||||
|
||||
void hb_proc_error_pos(hb_proc* proc, hb_err code, size_t pos, char const* msg) {
|
||||
hb_proc_error_setandjmp(proc, code, pos, (char*) msg, false);
|
||||
void hb_proc_error_pos_len(hb_proc* proc, hb_err code, size_t pos,
|
||||
char const* msg, size_t msg_len)
|
||||
{
|
||||
char* dup = malloc((msg_len + 1) * sizeof(char));
|
||||
memcpy(dup, msg, msg_len);
|
||||
dup[msg_len] = '\0';
|
||||
hb_proc_error_setandjmp(proc, code, pos, dup);
|
||||
}
|
||||
|
||||
void hb_proc_error_custom_pos(hb_proc* proc, hb_err code, size_t pos, char const* format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
void hb_proc_error_custom_pos(hb_proc* proc, hb_err code, size_t pos,
|
||||
char const* format, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
|
||||
char* msg = calloc(HB_PROC_ERROR_CUSTOM_SIZE, sizeof(char));
|
||||
vsnprintf(msg, HB_PROC_ERROR_CUSTOM_SIZE - 1, format, args);
|
||||
char* msg = calloc(HB_PROC_ERROR_CUSTOM_SIZE, sizeof(char));
|
||||
vsnprintf(msg, HB_PROC_ERROR_CUSTOM_SIZE - 1, format, args);
|
||||
|
||||
va_end(args);
|
||||
va_end(args);
|
||||
|
||||
hb_proc_error_setandjmp(proc, code, pos, msg, true);
|
||||
hb_proc_error_setandjmp(proc, code, pos, msg);
|
||||
}
|
||||
|
|
|
@ -1,41 +1,50 @@
|
|||
#include <string.h>
|
||||
#include <hb/proc.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* Checks if the next sequence of characters matches the character array `match`.
|
||||
* Won't cause an error if insufficient amount of characters left.
|
||||
* Checks if the next sequence of characters matches the character array
|
||||
* `match`. Won't cause an error if insufficient amount of characters left.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param characters to check against
|
||||
* @return amount of characters matched, which should be equal to `strlen(match)`
|
||||
* @return amount of characters matched, which should be equal to
|
||||
* `strlen(match)`
|
||||
*/
|
||||
size_t hb_proc_matches_len(hb_proc* proc, char const* match, size_t match_len) {
|
||||
// Check that there are enough characters left.
|
||||
if (!hb_proc_bounds_check_offset(proc, match_len)) return 0;
|
||||
size_t hb_proc_matches_len(hb_proc* proc, char const* match, size_t match_len)
|
||||
{
|
||||
// Check that there are enough characters left.
|
||||
if (!hb_proc_bounds_check_offset(proc, match_len))
|
||||
return 0;
|
||||
|
||||
// Compare characters with fast memcmp.
|
||||
if (memcmp(&proc->src[proc->src_next], match, match_len) != 0) return 0;
|
||||
// Compare characters with fast memcmp.
|
||||
if (memcmp(&proc->src[proc->src_next], match, match_len) != 0)
|
||||
return 0;
|
||||
|
||||
// Return amount of characters matched.
|
||||
return match_len;
|
||||
// Return amount of characters matched.
|
||||
return match_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the next sequence of characters matches the character array `match` of lowercase characters ignoring case.
|
||||
* Won't cause an error if insufficient amount of characters left.
|
||||
* Checks if the next sequence of characters matches the character array `match`
|
||||
* of lowercase characters ignoring case. Won't cause an error if insufficient
|
||||
* amount of characters left.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param characters to check against ignoring case
|
||||
* @return amount of characters matched, which should be equal to `strlen(match)`
|
||||
* @return amount of characters matched, which should be equal to
|
||||
* `strlen(match)`
|
||||
*/
|
||||
size_t hb_proc_matches_len_i(hb_proc* proc, char const* match, size_t match_len) {
|
||||
// Check that there are enough characters left.
|
||||
if (!hb_proc_bounds_check_offset(proc, match_len)) return 0;
|
||||
size_t hb_proc_matches_len_i(hb_proc* proc, char const* match, size_t match_len)
|
||||
{
|
||||
// Check that there are enough characters left.
|
||||
if (!hb_proc_bounds_check_offset(proc, match_len))
|
||||
return 0;
|
||||
|
||||
// Compare characters ignoring case using strncasecmp.
|
||||
if (strncasecmp(&proc->src[proc->src_next], match, match_len) != 0) return 0;
|
||||
// Compare characters ignoring case using strncasecmp.
|
||||
if (strncasecmp(&proc->src[proc->src_next], match, match_len) != 0)
|
||||
return 0;
|
||||
|
||||
return match_len;
|
||||
return match_len;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -45,9 +54,12 @@ size_t hb_proc_matches_len_i(hb_proc* proc, char const* match, size_t match_len)
|
|||
* @param proc proc
|
||||
* @return amount of characters matched
|
||||
*/
|
||||
size_t hb_proc_matches_line_terminator(hb_proc* proc) {
|
||||
// Comparing against `\r\n` must be done before `\r`.
|
||||
return hb_proc_matches(proc, "\r\n") ? 2 :
|
||||
hb_proc_matches(proc, "\r") ? 1 :
|
||||
hb_proc_matches(proc, "\n");
|
||||
size_t hb_proc_matches_line_terminator(hb_proc* proc)
|
||||
{
|
||||
// Comparing against `\r\n` must be done before `\r`.
|
||||
return hb_proc_matches(proc, "\r\n")
|
||||
? 2
|
||||
: hb_proc_matches(proc, "\r")
|
||||
? 1
|
||||
: hb_proc_matches(proc, "\n");
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include <stddef.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rune.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Get the next character.
|
||||
|
@ -9,8 +9,9 @@
|
|||
* @param proc proc
|
||||
* @return character or {@link HB_EOF}
|
||||
*/
|
||||
hb_eof_rune hb_proc_peek_eof(hb_proc* proc) {
|
||||
return proc->src[proc->src_next];
|
||||
hb_eof_rune hb_proc_peek_eof(hb_proc* proc)
|
||||
{
|
||||
return proc->src[proc->src_next];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -21,45 +22,52 @@ hb_eof_rune hb_proc_peek_eof(hb_proc* proc) {
|
|||
* @return character
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END
|
||||
*/
|
||||
hb_rune hb_proc_peek(hb_proc* proc) {
|
||||
hb_proc_bounds_assert_not_eof(proc);
|
||||
hb_rune hb_proc_peek(hb_proc* proc)
|
||||
{
|
||||
hb_proc_bounds_assert_not_eof(proc);
|
||||
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
|
||||
return c;
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the `offset` character from next.
|
||||
* When `offset` is 0, the next character is returned (equivalent to {@link hb_proc_peek_eof}).
|
||||
* If `offset` represents after the last character, {@link HB_EOF} is returned.
|
||||
* When `offset` is 0, the next character is returned (equivalent to {@link
|
||||
* hb_proc_peek_eof}). If `offset` represents after the last character, {@link
|
||||
* HB_EOF} is returned.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param offset position of character to get
|
||||
* @return character or {@link HB_EOF}
|
||||
*/
|
||||
hb_eof_rune hb_proc_peek_eof_offset(hb_proc* proc, size_t offset) {
|
||||
if (!hb_proc_bounds_check_offset(proc, offset)) return HB_EOF;
|
||||
hb_eof_rune hb_proc_peek_eof_offset(hb_proc* proc, size_t offset)
|
||||
{
|
||||
if (!hb_proc_bounds_check_offset(proc, offset))
|
||||
return HB_EOF;
|
||||
|
||||
return proc->src[proc->src_next + offset];
|
||||
return proc->src[proc->src_next + offset];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the `offset` character from next.
|
||||
* When `offset` is 0, the next character is returned (equivalent to {@link hb_proc_peek_eof}).
|
||||
* An error will be caused if `offset` represents after the last character.
|
||||
* When `offset` is 0, the next character is returned (equivalent to {@link
|
||||
* hb_proc_peek_eof}). An error will be caused if `offset` represents after the
|
||||
* last character.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param offset position of character to get
|
||||
* @return character
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END
|
||||
*/
|
||||
hb_rune hb_proc_peek_offset(hb_proc* proc, size_t offset) {
|
||||
hb_eof_rune c = hb_proc_peek_eof_offset(proc, offset);
|
||||
hb_rune hb_proc_peek_offset(hb_proc* proc, size_t offset)
|
||||
{
|
||||
hb_eof_rune c = hb_proc_peek_eof_offset(proc, offset);
|
||||
|
||||
if (c == HB_EOF) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END, "Unexpected end of input");
|
||||
}
|
||||
if (c == HB_EOF) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END,
|
||||
"Unexpected end of input");
|
||||
}
|
||||
|
||||
return c;
|
||||
return c;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include <hb/err.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
/**
|
||||
* Require the next character to be `c`.
|
||||
|
@ -10,37 +10,46 @@
|
|||
* @param c character to match
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
|
||||
*/
|
||||
void hb_proc_require(hb_proc* proc, hb_rune c) {
|
||||
hb_rune n = hb_proc_accept(proc);
|
||||
void hb_proc_require(hb_proc* proc, hb_rune c)
|
||||
{
|
||||
hb_rune n = hb_proc_accept(proc);
|
||||
|
||||
if (c != n) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected `%c` (U+%x), got `%c` (U+%x)", c, c, n, n);
|
||||
}
|
||||
if (c != n) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected `%c` (U+%x), got `%c` (U+%x)", c,
|
||||
c, n, n);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Require the next character to be `c`.
|
||||
* The matched character is skipped over and NOT written to output, and also returned.
|
||||
* The matched character is skipped over and NOT written to output, and also
|
||||
* returned.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param c character to match
|
||||
* @return matched character
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
|
||||
*/
|
||||
hb_rune hb_proc_require_skip(hb_proc* proc, hb_rune c) {
|
||||
hb_rune n = hb_proc_skip(proc);
|
||||
hb_rune hb_proc_require_skip(hb_proc* proc, hb_rune c)
|
||||
{
|
||||
hb_rune n = hb_proc_skip(proc);
|
||||
|
||||
if (c != n) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected `%c` (U+%x), got `%c` (U+%x) at %s", c, c, n, n);
|
||||
}
|
||||
if (c != n) {
|
||||
hb_proc_error_custom(
|
||||
proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected `%c` (U+%x), got `%c` (U+%x) at %s", c, c, n,
|
||||
n);
|
||||
}
|
||||
|
||||
return n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Require the next character to satisfy the predicate `pred`.
|
||||
* The matched character is written to output.
|
||||
* If not matched, the error message will describe the expected output using `name`.
|
||||
* If not matched, the error message will describe the expected output using
|
||||
* `name`.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param pred predicate
|
||||
|
@ -48,20 +57,25 @@ hb_rune hb_proc_require_skip(hb_proc* proc, hb_rune c) {
|
|||
* @return required character
|
||||
* @throws HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
|
||||
*/
|
||||
hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred, char const* name) {
|
||||
hb_rune n = hb_proc_accept(proc);
|
||||
hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred,
|
||||
char const* name)
|
||||
{
|
||||
hb_rune n = hb_proc_accept(proc);
|
||||
|
||||
if (!(*pred)(n)) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected %s, got `%c` (U+%x)", name, n, n);
|
||||
}
|
||||
if (!(*pred)(n)) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected %s, got `%c` (U+%x)", name, n,
|
||||
n);
|
||||
}
|
||||
|
||||
return n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Require the next character to satisfy the predicate `pred`.
|
||||
* The matched character is skipped over and NOT written to output.
|
||||
* If not matched, the error message will describe the expected output using `name`.
|
||||
* If not matched, the error message will describe the expected output using
|
||||
* `name`.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param pred predicate
|
||||
|
@ -69,14 +83,18 @@ hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred, char const*
|
|||
* @return required character
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
|
||||
*/
|
||||
hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred, char const* name) {
|
||||
hb_rune n = hb_proc_skip(proc);
|
||||
hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred,
|
||||
char const* name)
|
||||
{
|
||||
hb_rune n = hb_proc_skip(proc);
|
||||
|
||||
if (!(*pred)(n)) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected %s, got `%c` (U+%x)", name, n, n);
|
||||
}
|
||||
if (!(*pred)(n)) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected %s, got `%c` (U+%x)", name, n,
|
||||
n);
|
||||
}
|
||||
|
||||
return n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -85,14 +103,16 @@ hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred, char c
|
|||
*
|
||||
* @param proc proc
|
||||
* @param match sequence of characters to require
|
||||
* @param match_len length of {@arg match}
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
|
||||
*/
|
||||
void hb_proc_require_match(hb_proc* proc, char const* match) {
|
||||
size_t matches = hb_proc_accept_if_matches(proc, match);
|
||||
|
||||
if (!matches) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected `%s`", match);
|
||||
}
|
||||
void hb_proc_require_match_len(hb_proc* proc, char const* match,
|
||||
size_t match_len)
|
||||
{
|
||||
if (!hb_proc_accept_if_matches_len(proc, match, match_len)) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected `%s`", match);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -101,12 +121,16 @@ void hb_proc_require_match(hb_proc* proc, char const* match) {
|
|||
*
|
||||
* @param proc proc
|
||||
* @param match sequence of characters to require
|
||||
* @param match_len length of {@arg match}
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
|
||||
*/
|
||||
void hb_proc_require_skip_match(hb_proc* proc, char const* match) {
|
||||
size_t matches = hb_proc_skip_if_matches(proc, match);
|
||||
void hb_proc_require_skip_match_len(hb_proc* proc, char const* match,
|
||||
size_t match_len)
|
||||
{
|
||||
if (!hb_proc_matches_len(proc, match, match_len)) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected `%s`", match);
|
||||
}
|
||||
|
||||
if (!matches) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected `%s`", match);
|
||||
}
|
||||
hb_proc_skip_amount(proc, match_len);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
/**
|
||||
* Skip over the next character.
|
||||
|
@ -9,14 +9,15 @@
|
|||
* @return skipped character
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END
|
||||
*/
|
||||
hb_rune hb_proc_skip(hb_proc* proc) {
|
||||
hb_proc_bounds_assert_not_eof(proc);
|
||||
hb_rune hb_proc_skip(hb_proc* proc)
|
||||
{
|
||||
hb_proc_bounds_assert_not_eof(proc);
|
||||
|
||||
hb_rune c = proc->src[proc->src_next];
|
||||
hb_rune c = proc->src[proc->src_next];
|
||||
|
||||
proc->src_next++;
|
||||
proc->src_next++;
|
||||
|
||||
return c;
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -28,12 +29,13 @@ hb_rune hb_proc_skip(hb_proc* proc) {
|
|||
* @return amount of characters skipped
|
||||
* @throws on HB_ERR_PARSE_UNEXPECTED_END
|
||||
*/
|
||||
size_t hb_proc_skip_amount(hb_proc* proc, size_t amount) {
|
||||
hb_proc_bounds_assert_offset(proc, amount);
|
||||
size_t hb_proc_skip_amount(hb_proc* proc, size_t amount)
|
||||
{
|
||||
hb_proc_bounds_assert_offset(proc, amount);
|
||||
|
||||
proc->src_next += amount;
|
||||
proc->src_next += amount;
|
||||
|
||||
return amount;
|
||||
return amount;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -46,41 +48,43 @@ size_t hb_proc_skip_amount(hb_proc* proc, size_t amount) {
|
|||
* @param c character to skip if next
|
||||
* @return 1 if skipped, 0 otherwise
|
||||
*/
|
||||
size_t hb_proc_skip_if(hb_proc* proc, hb_rune c) {
|
||||
hb_eof_rune n = hb_proc_peek_eof(proc);
|
||||
size_t hb_proc_skip_if(hb_proc* proc, hb_rune c)
|
||||
{
|
||||
hb_eof_rune n = hb_proc_peek_eof(proc);
|
||||
|
||||
// n != c takes care of n == HB_EOF
|
||||
if (n != c) {
|
||||
return 0;
|
||||
}
|
||||
// n != c takes care of n == HB_EOF
|
||||
if (n != c) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
proc->src_next++;
|
||||
proc->src_next++;
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip over every following character until one dissatisfies the predicate `pred`,
|
||||
* or the end is reached.
|
||||
* Skip over every following character until one dissatisfies the predicate
|
||||
* `pred`, or the end is reached.
|
||||
*
|
||||
* @param proc proc
|
||||
* @param pred predicate
|
||||
* @return amount of characters skipped
|
||||
*/
|
||||
size_t hb_proc_skip_while_predicate(hb_proc* proc, hb_proc_pred* pred) {
|
||||
size_t count = 0;
|
||||
size_t hb_proc_skip_while_predicate(hb_proc* proc, hb_proc_pred* pred)
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
while (true) {
|
||||
hb_eof_rune c = hb_proc_peek_eof_offset(proc, count);
|
||||
while (true) {
|
||||
hb_eof_rune c = hb_proc_peek_eof_offset(proc, count);
|
||||
|
||||
if (c == HB_EOF || !(*pred)(c)) {
|
||||
break;
|
||||
}
|
||||
if (c == HB_EOF || !(*pred)(c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
count++;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
proc->src_next += count;
|
||||
proc->src_next += count;
|
||||
|
||||
return count;
|
||||
return count;
|
||||
}
|
||||
|
|
|
@ -1,31 +1,41 @@
|
|||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/proc.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
// A view represents a substring of the source. Faster, easier, safer, and more efficient than making a copy.
|
||||
// If the end is before the start, it's invalid, like NaN. Can be used for special meaning.
|
||||
// See lib/nicehash/view-str.h for more details.
|
||||
// A view represents a substring of the source. Faster, easier, safer, and more
|
||||
// efficient than making a copy. If the end is before the start, it's invalid,
|
||||
// like NaN. Can be used for special meaning. See lib/nicehash/view-str.h for
|
||||
// more details.
|
||||
|
||||
// To avoid underflow, there are no hb_proc_view_start_with_*_prev functions.
|
||||
|
||||
// Start a view at the position of the next character to consume.
|
||||
void hb_proc_view_start_with_src_next(nh_view_str* view, hb_proc* proc) {
|
||||
nh_view_str_set_start(view, proc->src_next);
|
||||
void hb_proc_view_start_with_src_next(nh_view_str* view, hb_proc* proc)
|
||||
{
|
||||
nh_view_str_set_start(view, proc->src_next);
|
||||
}
|
||||
|
||||
// End a view at the position of the last character consumed (inclusive).
|
||||
void hb_proc_view_end_with_src_prev(nh_view_str* view, hb_proc* proc) {
|
||||
nh_view_str_set_length(view, proc->src_next <= view->start ? 0 : proc->src_next - view->start);
|
||||
void hb_proc_view_end_with_src_prev(nh_view_str* view, hb_proc* proc)
|
||||
{
|
||||
nh_view_str_set_length(view, proc->src_next <= view->start
|
||||
? 0
|
||||
: proc->src_next - view->start);
|
||||
}
|
||||
|
||||
// Start a view at the position of the next character that will have been processed.
|
||||
void hb_proc_view_start_with_out_next(nh_view_str* view, hb_proc* proc) {
|
||||
nh_view_str_set_start(view, proc->out_next);
|
||||
// Start a view at the position of the next character that will have been
|
||||
// processed.
|
||||
void hb_proc_view_start_with_out_next(nh_view_str* view, hb_proc* proc)
|
||||
{
|
||||
nh_view_str_set_start(view, proc->out_next);
|
||||
}
|
||||
|
||||
// End a view at the position of the last character processed (inclusive).
|
||||
void hb_proc_view_end_with_out_prev(nh_view_str* view, hb_proc* proc) {
|
||||
nh_view_str_set_length(view, proc->out_next <= view->start ? 0 : proc->out_next - view->start);
|
||||
void hb_proc_view_end_with_out_prev(nh_view_str* view, hb_proc* proc)
|
||||
{
|
||||
nh_view_str_set_length(view, proc->out_next <= view->start
|
||||
? 0
|
||||
: proc->out_next - view->start);
|
||||
}
|
||||
|
|
|
@ -1,48 +1,53 @@
|
|||
#include <hb/proc.h>
|
||||
|
||||
void hb_proc_write(hb_proc* proc, hb_rune c) {
|
||||
// WARNING: Does not check if out_next exceeds bounds.
|
||||
proc->out[proc->out_next] = c;
|
||||
proc->out_next++;
|
||||
void hb_proc_write(hb_proc* proc, hb_rune c)
|
||||
{
|
||||
// WARNING: Does not check if out_next exceeds bounds.
|
||||
proc->out[proc->out_next] = c;
|
||||
proc->out_next++;
|
||||
}
|
||||
|
||||
void hb_proc_write_view(hb_proc* proc, nh_view_str* view) {
|
||||
// WARNING: Does not check boundaries.
|
||||
// WARNING: This works because nh_view_str and proc->out have the same element types. Be aware should this change.
|
||||
memcpy(&proc->out[proc->out_next], &view->array[view->start], view->length * sizeof(hb_rune));
|
||||
proc->out_next += view->length;
|
||||
void hb_proc_write_view(hb_proc* proc, nh_view_str* view)
|
||||
{
|
||||
// WARNING: Does not check boundaries.
|
||||
// WARNING: This works because nh_view_str and proc->out have the same
|
||||
// element types. Be aware should this change.
|
||||
memcpy(&proc->out[proc->out_next], &view->array[view->start],
|
||||
view->length * sizeof(hb_rune));
|
||||
proc->out_next += view->length;
|
||||
}
|
||||
|
||||
size_t hb_proc_write_utf_8(hb_proc* proc, uint32_t c) {
|
||||
if (c <= 0x7F) {
|
||||
// Plain ASCII.
|
||||
hb_proc_write(proc, (hb_rune) c);
|
||||
return 1;
|
||||
}
|
||||
size_t hb_proc_write_utf_8(hb_proc* proc, uint32_t c)
|
||||
{
|
||||
if (c <= 0x7F) {
|
||||
// Plain ASCII.
|
||||
hb_proc_write(proc, (hb_rune) c);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (c <= 0x07FF) {
|
||||
// 2-byte UTF-8.
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 6) & 0x1F) | 0xC0));
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 0) & 0x3F) | 0x80));
|
||||
return 2;
|
||||
}
|
||||
if (c <= 0x07FF) {
|
||||
// 2-byte UTF-8.
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 6) & 0x1F) | 0xC0));
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 0) & 0x3F) | 0x80));
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
// 3-byte UTF-8.
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 12) & 0x0F) | 0xE0));
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 6) & 0x3F) | 0x80));
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 0) & 0x3F) | 0x80));
|
||||
return 3;
|
||||
}
|
||||
if (c <= 0xFFFF) {
|
||||
// 3-byte UTF-8.
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 12) & 0x0F) | 0xE0));
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 6) & 0x3F) | 0x80));
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 0) & 0x3F) | 0x80));
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (c <= 0x10FFFF) {
|
||||
// 4-byte UTF-8.
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 18) & 0x07) | 0xF0));
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 12) & 0x3F) | 0x80));
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 6) & 0x3F) | 0x80));
|
||||
hb_proc_write(proc, (hb_rune) (((c >> 0) & 0x3F) | 0x80));
|
||||
return 4;
|
||||
}
|
||||
if (c <= 0x10FFFF) {
|
||||
// 4-byte UTF-8.
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 18) & 0x07) | 0xF0));
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 12) & 0x3F) | 0x80));
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 6) & 0x3F) | 0x80));
|
||||
hb_proc_write(proc, (hb_rune)(((c >> 0) & 0x3F) | 0x80));
|
||||
return 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
void hb_rule_init(void);
|
||||
|
||||
|
@ -103,15 +103,19 @@ bool hb_rule_tag_wss_check(nh_view_str* tag);
|
|||
|
||||
void hb_rule_tag_child_blacklist_map_add_entries(hb_map_tag_relations* map);
|
||||
void hb_rule_tag_child_blacklist_init(void);
|
||||
bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent, nh_view_str* child);
|
||||
bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent,
|
||||
nh_view_str* child);
|
||||
|
||||
void hb_rule_tag_child_whitelist_map_add_entries(hb_map_tag_relations* map);
|
||||
void hb_rule_tag_child_whitelist_init(void);
|
||||
bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent, nh_view_str* child);
|
||||
bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent,
|
||||
nh_view_str* child);
|
||||
|
||||
void hb_rule_tag_parent_blacklist_init(void);
|
||||
bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child, nh_view_str* parent);
|
||||
bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child,
|
||||
nh_view_str* parent);
|
||||
|
||||
void hb_rule_tag_parent_whitelist_map_add_entries(hb_map_tag_relations* map);
|
||||
void hb_rule_tag_parent_whitelist_init(void);
|
||||
bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child, nh_view_str* parent);
|
||||
bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child,
|
||||
nh_view_str* parent);
|
||||
|
|
|
@ -1,20 +1,23 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_ascii_control_set;
|
||||
|
||||
void hb_rule_ascii_control_add_elems(nh_bitfield_ascii* set) {
|
||||
for (char c = 0x0; c <= 0x1F; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
void hb_rule_ascii_control_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
for (char c = 0x0; c <= 0x1F; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
void hb_rule_ascii_control_init(void) {
|
||||
hb_rule_ascii_control_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_control_add_elems(hb_rule_ascii_control_set);
|
||||
void hb_rule_ascii_control_init(void)
|
||||
{
|
||||
hb_rule_ascii_control_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_control_add_elems(hb_rule_ascii_control_set);
|
||||
}
|
||||
|
||||
bool hb_rule_ascii_control_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_control_set, c);
|
||||
bool hb_rule_ascii_control_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_control_set, c);
|
||||
}
|
||||
|
|
|
@ -1,20 +1,23 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_ascii_digit_set;
|
||||
|
||||
void hb_rule_ascii_digit_add_elems(nh_bitfield_ascii* set) {
|
||||
for (char c = '0'; c <= '9'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
void hb_rule_ascii_digit_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
for (char c = '0'; c <= '9'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
void hb_rule_ascii_digit_init(void) {
|
||||
hb_rule_ascii_digit_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_digit_add_elems(hb_rule_ascii_digit_set);
|
||||
void hb_rule_ascii_digit_init(void)
|
||||
{
|
||||
hb_rule_ascii_digit_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_digit_add_elems(hb_rule_ascii_digit_set);
|
||||
}
|
||||
|
||||
bool hb_rule_ascii_digit_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_digit_set, c);
|
||||
bool hb_rule_ascii_digit_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_digit_set, c);
|
||||
}
|
||||
|
|
|
@ -1,26 +1,29 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_ascii_hex_set;
|
||||
|
||||
void hb_rule_ascii_hex_add_elems(nh_bitfield_ascii* set) {
|
||||
for (char c = '0'; c <= '9'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
for (char c = 'A'; c <= 'F'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
for (char c = 'a'; c <= 'f'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
void hb_rule_ascii_hex_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
for (char c = '0'; c <= '9'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
for (char c = 'A'; c <= 'F'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
for (char c = 'a'; c <= 'f'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
void hb_rule_ascii_hex_init(void) {
|
||||
hb_rule_ascii_hex_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_hex_add_elems(hb_rule_ascii_hex_set);
|
||||
void hb_rule_ascii_hex_init(void)
|
||||
{
|
||||
hb_rule_ascii_hex_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_hex_add_elems(hb_rule_ascii_hex_set);
|
||||
}
|
||||
|
||||
bool hb_rule_ascii_hex_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_hex_set, c);
|
||||
bool hb_rule_ascii_hex_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_hex_set, c);
|
||||
}
|
||||
|
|
|
@ -1,20 +1,23 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_ascii_lowercase_set;
|
||||
|
||||
void hb_rule_ascii_lowercase_add_elems(nh_bitfield_ascii* set) {
|
||||
for (char c = 'a'; c <= 'z'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
void hb_rule_ascii_lowercase_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
for (char c = 'a'; c <= 'z'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
void hb_rule_ascii_lowercase_init(void) {
|
||||
hb_rule_ascii_lowercase_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_lowercase_add_elems(hb_rule_ascii_lowercase_set);
|
||||
void hb_rule_ascii_lowercase_init(void)
|
||||
{
|
||||
hb_rule_ascii_lowercase_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_lowercase_add_elems(hb_rule_ascii_lowercase_set);
|
||||
}
|
||||
|
||||
bool hb_rule_ascii_lowercase_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_lowercase_set, c);
|
||||
bool hb_rule_ascii_lowercase_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_lowercase_set, c);
|
||||
}
|
||||
|
|
|
@ -1,20 +1,23 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_ascii_uppercase_set;
|
||||
|
||||
void hb_rule_ascii_uppercase_add_elems(nh_bitfield_ascii* set) {
|
||||
for (char c = 'A'; c <= 'Z'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
void hb_rule_ascii_uppercase_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
for (char c = 'A'; c <= 'Z'; c++) {
|
||||
nh_bitfield_ascii_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
void hb_rule_ascii_uppercase_init(void) {
|
||||
hb_rule_ascii_uppercase_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_uppercase_add_elems(hb_rule_ascii_uppercase_set);
|
||||
void hb_rule_ascii_uppercase_init(void)
|
||||
{
|
||||
hb_rule_ascii_uppercase_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_uppercase_add_elems(hb_rule_ascii_uppercase_set);
|
||||
}
|
||||
|
||||
bool hb_rule_ascii_uppercase_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_uppercase_set, c);
|
||||
bool hb_rule_ascii_uppercase_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_uppercase_set, c);
|
||||
}
|
||||
|
|
|
@ -1,22 +1,25 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_ascii_whitespace_set;
|
||||
|
||||
void hb_rule_ascii_whitespace_add_elems(nh_bitfield_ascii* set) {
|
||||
nh_bitfield_ascii_add(set, '\t'); // TAB
|
||||
nh_bitfield_ascii_add(set, '\n'); // LF
|
||||
nh_bitfield_ascii_add(set, '\f'); // FF
|
||||
nh_bitfield_ascii_add(set, '\r'); // CR
|
||||
nh_bitfield_ascii_add(set, ' '); // SPACE
|
||||
void hb_rule_ascii_whitespace_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
nh_bitfield_ascii_add(set, '\t'); // TAB
|
||||
nh_bitfield_ascii_add(set, '\n'); // LF
|
||||
nh_bitfield_ascii_add(set, '\f'); // FF
|
||||
nh_bitfield_ascii_add(set, '\r'); // CR
|
||||
nh_bitfield_ascii_add(set, ' '); // SPACE
|
||||
}
|
||||
|
||||
void hb_rule_ascii_whitespace_init(void) {
|
||||
hb_rule_ascii_whitespace_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_whitespace_add_elems(hb_rule_ascii_whitespace_set);
|
||||
void hb_rule_ascii_whitespace_init(void)
|
||||
{
|
||||
hb_rule_ascii_whitespace_set = nh_bitfield_ascii_create();
|
||||
hb_rule_ascii_whitespace_add_elems(hb_rule_ascii_whitespace_set);
|
||||
}
|
||||
|
||||
bool hb_rule_ascii_whitespace_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_whitespace_set, c);
|
||||
bool hb_rule_ascii_whitespace_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_ascii_whitespace_set, c);
|
||||
}
|
||||
|
|
|
@ -1,25 +1,29 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_attr_name_exceptions;
|
||||
|
||||
void hb_rule_attr_name_add_exceptions(nh_bitfield_ascii* set) {
|
||||
hb_rule_ascii_control_add_elems(set);
|
||||
nh_bitfield_ascii_add(set, ' ');
|
||||
nh_bitfield_ascii_add(set, '"');
|
||||
nh_bitfield_ascii_add(set, '\'');
|
||||
nh_bitfield_ascii_add(set, '>');
|
||||
nh_bitfield_ascii_add(set, '/');
|
||||
nh_bitfield_ascii_add(set, '=');
|
||||
// NOTE: Unicode noncharacters not tested (https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name)
|
||||
void hb_rule_attr_name_add_exceptions(nh_bitfield_ascii* set)
|
||||
{
|
||||
hb_rule_ascii_control_add_elems(set);
|
||||
nh_bitfield_ascii_add(set, ' ');
|
||||
nh_bitfield_ascii_add(set, '"');
|
||||
nh_bitfield_ascii_add(set, '\'');
|
||||
nh_bitfield_ascii_add(set, '>');
|
||||
nh_bitfield_ascii_add(set, '/');
|
||||
nh_bitfield_ascii_add(set, '=');
|
||||
// NOTE: Unicode noncharacters not tested
|
||||
// (https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name)
|
||||
}
|
||||
|
||||
void hb_rule_attr_name_init(void) {
|
||||
hb_rule_attr_name_exceptions = nh_bitfield_ascii_create();
|
||||
hb_rule_attr_name_add_exceptions(hb_rule_attr_name_exceptions);
|
||||
void hb_rule_attr_name_init(void)
|
||||
{
|
||||
hb_rule_attr_name_exceptions = nh_bitfield_ascii_create();
|
||||
hb_rule_attr_name_add_exceptions(hb_rule_attr_name_exceptions);
|
||||
}
|
||||
|
||||
bool hb_rule_attr_name_check(hb_rune c) {
|
||||
return !nh_bitfield_ascii_has(hb_rule_attr_name_exceptions, c);
|
||||
bool hb_rule_attr_name_check(hb_rune c)
|
||||
{
|
||||
return !nh_bitfield_ascii_has(hb_rule_attr_name_exceptions, c);
|
||||
}
|
||||
|
|
|
@ -1,20 +1,24 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_attr_quote_set;
|
||||
|
||||
void hb_rule_attr_quote_add_elems(nh_bitfield_ascii* set) {
|
||||
// Backtick is not a valid quote character according to https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
||||
nh_bitfield_ascii_add(set, '\'');
|
||||
nh_bitfield_ascii_add(set, '"');
|
||||
void hb_rule_attr_quote_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
// Backtick is not a valid quote character according to
|
||||
// https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
||||
nh_bitfield_ascii_add(set, '\'');
|
||||
nh_bitfield_ascii_add(set, '"');
|
||||
}
|
||||
|
||||
void hb_rule_attr_quote_init(void) {
|
||||
hb_rule_attr_quote_set = nh_bitfield_ascii_create();
|
||||
hb_rule_attr_quote_add_elems(hb_rule_attr_quote_set);
|
||||
void hb_rule_attr_quote_init(void)
|
||||
{
|
||||
hb_rule_attr_quote_set = nh_bitfield_ascii_create();
|
||||
hb_rule_attr_quote_add_elems(hb_rule_attr_quote_set);
|
||||
}
|
||||
|
||||
bool hb_rule_attr_quote_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_attr_quote_set, c);
|
||||
bool hb_rule_attr_quote_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_attr_quote_set, c);
|
||||
}
|
||||
|
|
|
@ -1,24 +1,28 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_attr_unquotedvalue_exceptions;
|
||||
|
||||
void hb_rule_attr_unquotedvalue_add_exceptions(nh_bitfield_ascii* set) {
|
||||
hb_rule_ascii_whitespace_add_elems(set);
|
||||
nh_bitfield_ascii_add(set, '"');
|
||||
nh_bitfield_ascii_add(set, '\'');
|
||||
nh_bitfield_ascii_add(set, '`');
|
||||
nh_bitfield_ascii_add(set, '=');
|
||||
nh_bitfield_ascii_add(set, '<');
|
||||
nh_bitfield_ascii_add(set, '>');
|
||||
void hb_rule_attr_unquotedvalue_add_exceptions(nh_bitfield_ascii* set)
|
||||
{
|
||||
hb_rule_ascii_whitespace_add_elems(set);
|
||||
nh_bitfield_ascii_add(set, '"');
|
||||
nh_bitfield_ascii_add(set, '\'');
|
||||
nh_bitfield_ascii_add(set, '`');
|
||||
nh_bitfield_ascii_add(set, '=');
|
||||
nh_bitfield_ascii_add(set, '<');
|
||||
nh_bitfield_ascii_add(set, '>');
|
||||
}
|
||||
|
||||
void hb_rule_attr_unquotedvalue_init(void) {
|
||||
hb_rule_attr_unquotedvalue_exceptions = nh_bitfield_ascii_create();
|
||||
hb_rule_attr_unquotedvalue_add_exceptions(hb_rule_attr_unquotedvalue_exceptions);
|
||||
void hb_rule_attr_unquotedvalue_init(void)
|
||||
{
|
||||
hb_rule_attr_unquotedvalue_exceptions = nh_bitfield_ascii_create();
|
||||
hb_rule_attr_unquotedvalue_add_exceptions(
|
||||
hb_rule_attr_unquotedvalue_exceptions);
|
||||
}
|
||||
|
||||
bool hb_rule_attr_unquotedvalue_check(hb_rune c) {
|
||||
return !nh_bitfield_ascii_has(hb_rule_attr_unquotedvalue_exceptions, c);
|
||||
bool hb_rule_attr_unquotedvalue_check(hb_rune c)
|
||||
{
|
||||
return !nh_bitfield_ascii_has(hb_rule_attr_unquotedvalue_exceptions, c);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,44 +1,45 @@
|
|||
#include <hb/rule.h>
|
||||
|
||||
void hb_rule_init(void) {
|
||||
// Core
|
||||
hb_rule_ascii_control_init();
|
||||
hb_rule_ascii_digit_init();
|
||||
hb_rule_ascii_hex_init();
|
||||
hb_rule_ascii_uppercase_init();
|
||||
hb_rule_ascii_lowercase_init();
|
||||
hb_rule_ascii_whitespace_init();
|
||||
void hb_rule_init(void)
|
||||
{
|
||||
// Core
|
||||
hb_rule_ascii_control_init();
|
||||
hb_rule_ascii_digit_init();
|
||||
hb_rule_ascii_hex_init();
|
||||
hb_rule_ascii_uppercase_init();
|
||||
hb_rule_ascii_lowercase_init();
|
||||
hb_rule_ascii_whitespace_init();
|
||||
|
||||
// Identifiers
|
||||
hb_rule_tag_name_init();
|
||||
hb_rule_attr_name_init();
|
||||
// Identifiers
|
||||
hb_rule_tag_name_init();
|
||||
hb_rule_attr_name_init();
|
||||
|
||||
// Values
|
||||
hb_rule_attr_quote_init();
|
||||
hb_rule_attr_unquotedvalue_init();
|
||||
hb_rule_entity_reference_init();
|
||||
// Values
|
||||
hb_rule_attr_quote_init();
|
||||
hb_rule_attr_unquotedvalue_init();
|
||||
hb_rule_entity_reference_init();
|
||||
|
||||
// Specification tag categories
|
||||
hb_rule_tag_heading_init();
|
||||
hb_rule_tag_media_init();
|
||||
hb_rule_tag_sectioning_init();
|
||||
// Specification tag categories
|
||||
hb_rule_tag_heading_init();
|
||||
hb_rule_tag_media_init();
|
||||
hb_rule_tag_sectioning_init();
|
||||
|
||||
hb_rule_tag_void_init();
|
||||
hb_rule_tag_wss_init();
|
||||
hb_rule_tag_void_init();
|
||||
hb_rule_tag_wss_init();
|
||||
|
||||
hb_rule_tag_html_init();
|
||||
hb_rule_tag_svg_init();
|
||||
hb_rule_tag_html_init();
|
||||
hb_rule_tag_svg_init();
|
||||
|
||||
// hyperbuild tag categories
|
||||
hb_rule_tag_contentfirst_init();
|
||||
hb_rule_tag_content_init();
|
||||
hb_rule_tag_formatting_init();
|
||||
hb_rule_tag_layout_init();
|
||||
hb_rule_tag_specific_init();
|
||||
// hyperbuild tag categories
|
||||
hb_rule_tag_contentfirst_init();
|
||||
hb_rule_tag_content_init();
|
||||
hb_rule_tag_formatting_init();
|
||||
hb_rule_tag_layout_init();
|
||||
hb_rule_tag_specific_init();
|
||||
|
||||
// Relations
|
||||
hb_rule_tag_child_blacklist_init();
|
||||
hb_rule_tag_child_whitelist_init();
|
||||
hb_rule_tag_parent_blacklist_init();
|
||||
hb_rule_tag_parent_whitelist_init();
|
||||
// Relations
|
||||
hb_rule_tag_child_blacklist_init();
|
||||
hb_rule_tag_child_whitelist_init();
|
||||
hb_rule_tag_parent_blacklist_init();
|
||||
hb_rule_tag_parent_whitelist_init();
|
||||
}
|
||||
|
|
|
@ -1,85 +1,92 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_map_tag_relations* hb_rule_tag_child_blacklist_map;
|
||||
|
||||
void hb_rule_tag_child_blacklist_map_add_entries(hb_map_tag_relations* map) {
|
||||
// <address>
|
||||
hb_set_tag_names* address = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(address);
|
||||
hb_rule_tag_sectioning_add_elems(address);
|
||||
hb_set_tag_names_add_whole_literal(address, "address");
|
||||
hb_set_tag_names_add_whole_literal(address, "header");
|
||||
hb_set_tag_names_add_whole_literal(address, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "address", address);
|
||||
void hb_rule_tag_child_blacklist_map_add_entries(hb_map_tag_relations* map)
|
||||
{
|
||||
// <address>
|
||||
hb_set_tag_names* address = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(address);
|
||||
hb_rule_tag_sectioning_add_elems(address);
|
||||
hb_set_tag_names_add_whole_literal(address, "address");
|
||||
hb_set_tag_names_add_whole_literal(address, "header");
|
||||
hb_set_tag_names_add_whole_literal(address, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "address", address);
|
||||
|
||||
// <audio>
|
||||
hb_set_tag_names* audio = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(audio);
|
||||
hb_map_tag_relations_set_whole_literal(map, "audio", audio);
|
||||
// <audio>
|
||||
hb_set_tag_names* audio = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(audio);
|
||||
hb_map_tag_relations_set_whole_literal(map, "audio", audio);
|
||||
|
||||
// <dfn>
|
||||
hb_set_tag_names* dfn = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dfn, "dfn");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dfn", dfn);
|
||||
// <dfn>
|
||||
hb_set_tag_names* dfn = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dfn, "dfn");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dfn", dfn);
|
||||
|
||||
// <dt>
|
||||
hb_set_tag_names* dt = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(dt);
|
||||
hb_rule_tag_sectioning_add_elems(dt);
|
||||
hb_set_tag_names_add_whole_literal(dt, "header");
|
||||
hb_set_tag_names_add_whole_literal(dt, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
|
||||
// <dt>
|
||||
hb_set_tag_names* dt = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(dt);
|
||||
hb_rule_tag_sectioning_add_elems(dt);
|
||||
hb_set_tag_names_add_whole_literal(dt, "header");
|
||||
hb_set_tag_names_add_whole_literal(dt, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
|
||||
|
||||
// <footer>
|
||||
hb_set_tag_names* footer = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(footer, "header");
|
||||
hb_set_tag_names_add_whole_literal(footer, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "footer", footer);
|
||||
// <footer>
|
||||
hb_set_tag_names* footer = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(footer, "header");
|
||||
hb_set_tag_names_add_whole_literal(footer, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "footer", footer);
|
||||
|
||||
// <form>
|
||||
hb_set_tag_names* form = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(form, "form");
|
||||
hb_map_tag_relations_set_whole_literal(map, "form", form);
|
||||
// <form>
|
||||
hb_set_tag_names* form = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(form, "form");
|
||||
hb_map_tag_relations_set_whole_literal(map, "form", form);
|
||||
|
||||
// <header>
|
||||
hb_set_tag_names* header = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(header, "header");
|
||||
hb_set_tag_names_add_whole_literal(header, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "header", header);
|
||||
// <header>
|
||||
hb_set_tag_names* header = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(header, "header");
|
||||
hb_set_tag_names_add_whole_literal(header, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "header", header);
|
||||
|
||||
// <label>
|
||||
hb_set_tag_names* label = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(label, "label");
|
||||
hb_map_tag_relations_set_whole_literal(map, "label", label);
|
||||
// <label>
|
||||
hb_set_tag_names* label = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(label, "label");
|
||||
hb_map_tag_relations_set_whole_literal(map, "label", label);
|
||||
|
||||
// <progress>
|
||||
hb_set_tag_names* progress = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(progress, "progress");
|
||||
hb_map_tag_relations_set_whole_literal(map, "progress", progress);
|
||||
// <progress>
|
||||
hb_set_tag_names* progress = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(progress, "progress");
|
||||
hb_map_tag_relations_set_whole_literal(map, "progress", progress);
|
||||
|
||||
// <th>
|
||||
hb_set_tag_names* th = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(th);
|
||||
hb_rule_tag_sectioning_add_elems(th);
|
||||
hb_set_tag_names_add_whole_literal(th, "header");
|
||||
hb_set_tag_names_add_whole_literal(th, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "th", th);
|
||||
// <th>
|
||||
hb_set_tag_names* th = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(th);
|
||||
hb_rule_tag_sectioning_add_elems(th);
|
||||
hb_set_tag_names_add_whole_literal(th, "header");
|
||||
hb_set_tag_names_add_whole_literal(th, "footer");
|
||||
hb_map_tag_relations_set_whole_literal(map, "th", th);
|
||||
|
||||
// <video>
|
||||
hb_set_tag_names* video = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(video);
|
||||
hb_map_tag_relations_set_whole_literal(map, "video", video);
|
||||
// <video>
|
||||
hb_set_tag_names* video = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(video);
|
||||
hb_map_tag_relations_set_whole_literal(map, "video", video);
|
||||
}
|
||||
|
||||
void hb_rule_tag_child_blacklist_init(void) {
|
||||
hb_rule_tag_child_blacklist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_child_blacklist_map_add_entries(hb_rule_tag_child_blacklist_map);
|
||||
void hb_rule_tag_child_blacklist_init(void)
|
||||
{
|
||||
hb_rule_tag_child_blacklist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_child_blacklist_map_add_entries(
|
||||
hb_rule_tag_child_blacklist_map);
|
||||
}
|
||||
|
||||
// Check if a parent is allowed to have a specific child, based on the blacklist.
|
||||
bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent, nh_view_str* child) {
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(hb_rule_tag_child_blacklist_map, parent);
|
||||
return set == NULL || !hb_set_tag_names_has(set, child);
|
||||
// Check if a parent is allowed to have a specific child, based on the
|
||||
// blacklist.
|
||||
bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent,
|
||||
nh_view_str* child)
|
||||
{
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(
|
||||
hb_rule_tag_child_blacklist_map, parent);
|
||||
return set == NULL || !hb_set_tag_names_has(set, child);
|
||||
}
|
||||
|
|
|
@ -1,100 +1,106 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_map_tag_relations* hb_rule_tag_child_whitelist_map;
|
||||
|
||||
void hb_rule_tag_child_whitelist_map_add_entries(hb_map_tag_relations* map) {
|
||||
// <colgroup>
|
||||
hb_set_tag_names* colgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(colgroup, "col");
|
||||
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
|
||||
void hb_rule_tag_child_whitelist_map_add_entries(hb_map_tag_relations* map)
|
||||
{
|
||||
// <colgroup>
|
||||
hb_set_tag_names* colgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(colgroup, "col");
|
||||
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
|
||||
|
||||
// <datalist>
|
||||
hb_set_tag_names* datalist = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(datalist, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "datalist", datalist);
|
||||
// <datalist>
|
||||
hb_set_tag_names* datalist = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(datalist, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "datalist", datalist);
|
||||
|
||||
// <dl>
|
||||
hb_set_tag_names* dl = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dl, "dt");
|
||||
hb_set_tag_names_add_whole_literal(dl, "dd");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dl", dl);
|
||||
// <dl>
|
||||
hb_set_tag_names* dl = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dl, "dt");
|
||||
hb_set_tag_names_add_whole_literal(dl, "dd");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dl", dl);
|
||||
|
||||
// <hgroup>
|
||||
hb_set_tag_names* hgroup = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(hgroup);
|
||||
hb_map_tag_relations_set_whole_literal(map, "hgroup", hgroup);
|
||||
// <hgroup>
|
||||
hb_set_tag_names* hgroup = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(hgroup);
|
||||
hb_map_tag_relations_set_whole_literal(map, "hgroup", hgroup);
|
||||
|
||||
// <ol>
|
||||
hb_set_tag_names* ol = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(ol, "li");
|
||||
hb_map_tag_relations_set_whole_literal(map, "ol", ol);
|
||||
// <ol>
|
||||
hb_set_tag_names* ol = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(ol, "li");
|
||||
hb_map_tag_relations_set_whole_literal(map, "ol", ol);
|
||||
|
||||
// <optgroup>
|
||||
hb_set_tag_names* optgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(optgroup, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
|
||||
// <optgroup>
|
||||
hb_set_tag_names* optgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(optgroup, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
|
||||
|
||||
// <picture>
|
||||
hb_set_tag_names* picture = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(picture, "source");
|
||||
hb_set_tag_names_add_whole_literal(picture, "img");
|
||||
hb_map_tag_relations_set_whole_literal(map, "picture", picture);
|
||||
// <picture>
|
||||
hb_set_tag_names* picture = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(picture, "source");
|
||||
hb_set_tag_names_add_whole_literal(picture, "img");
|
||||
hb_map_tag_relations_set_whole_literal(map, "picture", picture);
|
||||
|
||||
// <select>
|
||||
hb_set_tag_names* select = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(select, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(select, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "select", select);
|
||||
// <select>
|
||||
hb_set_tag_names* select = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(select, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(select, "option");
|
||||
hb_map_tag_relations_set_whole_literal(map, "select", select);
|
||||
|
||||
// <table>
|
||||
hb_set_tag_names* table = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(table, "caption");
|
||||
hb_set_tag_names_add_whole_literal(table, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(table, "col");
|
||||
hb_set_tag_names_add_whole_literal(table, "thead");
|
||||
hb_set_tag_names_add_whole_literal(table, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(table, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(table, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "table", table);
|
||||
// <table>
|
||||
hb_set_tag_names* table = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(table, "caption");
|
||||
hb_set_tag_names_add_whole_literal(table, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(table, "col");
|
||||
hb_set_tag_names_add_whole_literal(table, "thead");
|
||||
hb_set_tag_names_add_whole_literal(table, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(table, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(table, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "table", table);
|
||||
|
||||
// <tbody>
|
||||
hb_set_tag_names* tbody = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tbody, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
|
||||
// <tbody>
|
||||
hb_set_tag_names* tbody = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tbody, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
|
||||
|
||||
// <tfoot>
|
||||
hb_set_tag_names* tfoot = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tfoot, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
|
||||
// <tfoot>
|
||||
hb_set_tag_names* tfoot = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tfoot, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
|
||||
|
||||
// <thead>
|
||||
hb_set_tag_names* thead = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(thead, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
|
||||
// <thead>
|
||||
hb_set_tag_names* thead = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(thead, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
|
||||
|
||||
// <tr>
|
||||
hb_set_tag_names* tr = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tr, "td");
|
||||
hb_set_tag_names_add_whole_literal(tr, "th");
|
||||
hb_set_tag_names_add_whole_literal(tr, "template");
|
||||
hb_set_tag_names_add_whole_literal(tr, "script");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
|
||||
// <tr>
|
||||
hb_set_tag_names* tr = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tr, "td");
|
||||
hb_set_tag_names_add_whole_literal(tr, "th");
|
||||
hb_set_tag_names_add_whole_literal(tr, "template");
|
||||
hb_set_tag_names_add_whole_literal(tr, "script");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
|
||||
|
||||
// <ul>
|
||||
hb_set_tag_names* ul = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(ul, "li");
|
||||
hb_map_tag_relations_set_whole_literal(map, "ul", ul);
|
||||
// <ul>
|
||||
hb_set_tag_names* ul = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(ul, "li");
|
||||
hb_map_tag_relations_set_whole_literal(map, "ul", ul);
|
||||
}
|
||||
|
||||
void hb_rule_tag_child_whitelist_init(void) {
|
||||
hb_rule_tag_child_whitelist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_child_whitelist_map_add_entries(hb_rule_tag_child_whitelist_map);
|
||||
void hb_rule_tag_child_whitelist_init(void)
|
||||
{
|
||||
hb_rule_tag_child_whitelist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_child_whitelist_map_add_entries(
|
||||
hb_rule_tag_child_whitelist_map);
|
||||
}
|
||||
|
||||
// Check if a parent is allowed to have a specific child.
|
||||
bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent, nh_view_str* child) {
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(hb_rule_tag_child_whitelist_map, parent);
|
||||
return set == NULL || hb_set_tag_names_has(set, child);
|
||||
bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent,
|
||||
nh_view_str* child)
|
||||
{
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(
|
||||
hb_rule_tag_child_whitelist_map, parent);
|
||||
return set == NULL || hb_set_tag_names_has(set, child);
|
||||
}
|
||||
|
|
|
@ -1,37 +1,41 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_content_set;
|
||||
|
||||
void hb_rule_tag_content_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "address");
|
||||
hb_set_tag_names_add_whole_literal(set, "audio");
|
||||
hb_set_tag_names_add_whole_literal(set, "button");
|
||||
hb_set_tag_names_add_whole_literal(set, "canvas");
|
||||
hb_set_tag_names_add_whole_literal(set, "caption");
|
||||
hb_set_tag_names_add_whole_literal(set, "figcaption");
|
||||
hb_set_tag_names_add_whole_literal(set, "h1");
|
||||
hb_set_tag_names_add_whole_literal(set, "h2");
|
||||
hb_set_tag_names_add_whole_literal(set, "h3");
|
||||
hb_set_tag_names_add_whole_literal(set, "h4");
|
||||
hb_set_tag_names_add_whole_literal(set, "h5");
|
||||
hb_set_tag_names_add_whole_literal(set, "h6");
|
||||
hb_set_tag_names_add_whole_literal(set, "legend");
|
||||
hb_set_tag_names_add_whole_literal(set, "meter");
|
||||
hb_set_tag_names_add_whole_literal(set, "object");
|
||||
hb_set_tag_names_add_whole_literal(set, "option");
|
||||
hb_set_tag_names_add_whole_literal(set, "p");
|
||||
hb_set_tag_names_add_whole_literal(set, "summary"); // Can also contain a heading
|
||||
hb_set_tag_names_add_whole_literal(set, "textarea");
|
||||
hb_set_tag_names_add_whole_literal(set, "video");
|
||||
void hb_rule_tag_content_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "address");
|
||||
hb_set_tag_names_add_whole_literal(set, "audio");
|
||||
hb_set_tag_names_add_whole_literal(set, "button");
|
||||
hb_set_tag_names_add_whole_literal(set, "canvas");
|
||||
hb_set_tag_names_add_whole_literal(set, "caption");
|
||||
hb_set_tag_names_add_whole_literal(set, "figcaption");
|
||||
hb_set_tag_names_add_whole_literal(set, "h1");
|
||||
hb_set_tag_names_add_whole_literal(set, "h2");
|
||||
hb_set_tag_names_add_whole_literal(set, "h3");
|
||||
hb_set_tag_names_add_whole_literal(set, "h4");
|
||||
hb_set_tag_names_add_whole_literal(set, "h5");
|
||||
hb_set_tag_names_add_whole_literal(set, "h6");
|
||||
hb_set_tag_names_add_whole_literal(set, "legend");
|
||||
hb_set_tag_names_add_whole_literal(set, "meter");
|
||||
hb_set_tag_names_add_whole_literal(set, "object");
|
||||
hb_set_tag_names_add_whole_literal(set, "option");
|
||||
hb_set_tag_names_add_whole_literal(set, "p");
|
||||
hb_set_tag_names_add_whole_literal(
|
||||
set, "summary"); // Can also contain a heading
|
||||
hb_set_tag_names_add_whole_literal(set, "textarea");
|
||||
hb_set_tag_names_add_whole_literal(set, "video");
|
||||
}
|
||||
|
||||
void hb_rule_tag_content_init(void) {
|
||||
hb_rule_tag_content_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_content_add_elems(hb_rule_tag_content_set);
|
||||
void hb_rule_tag_content_init(void)
|
||||
{
|
||||
hb_rule_tag_content_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_content_add_elems(hb_rule_tag_content_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_content_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_content_set, tag);
|
||||
bool hb_rule_tag_content_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_content_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,30 +1,33 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_contentfirst_set;
|
||||
|
||||
void hb_rule_tag_contentfirst_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "dd");
|
||||
hb_set_tag_names_add_whole_literal(set, "details");
|
||||
hb_set_tag_names_add_whole_literal(set, "dt");
|
||||
hb_set_tag_names_add_whole_literal(set, "iframe");
|
||||
hb_set_tag_names_add_whole_literal(set, "label");
|
||||
hb_set_tag_names_add_whole_literal(set, "li");
|
||||
hb_set_tag_names_add_whole_literal(set, "noscript");
|
||||
hb_set_tag_names_add_whole_literal(set, "output");
|
||||
hb_set_tag_names_add_whole_literal(set, "progress");
|
||||
hb_set_tag_names_add_whole_literal(set, "slot");
|
||||
hb_set_tag_names_add_whole_literal(set, "td");
|
||||
hb_set_tag_names_add_whole_literal(set, "template");
|
||||
hb_set_tag_names_add_whole_literal(set, "th");
|
||||
void hb_rule_tag_contentfirst_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "dd");
|
||||
hb_set_tag_names_add_whole_literal(set, "details");
|
||||
hb_set_tag_names_add_whole_literal(set, "dt");
|
||||
hb_set_tag_names_add_whole_literal(set, "iframe");
|
||||
hb_set_tag_names_add_whole_literal(set, "label");
|
||||
hb_set_tag_names_add_whole_literal(set, "li");
|
||||
hb_set_tag_names_add_whole_literal(set, "noscript");
|
||||
hb_set_tag_names_add_whole_literal(set, "output");
|
||||
hb_set_tag_names_add_whole_literal(set, "progress");
|
||||
hb_set_tag_names_add_whole_literal(set, "slot");
|
||||
hb_set_tag_names_add_whole_literal(set, "td");
|
||||
hb_set_tag_names_add_whole_literal(set, "template");
|
||||
hb_set_tag_names_add_whole_literal(set, "th");
|
||||
}
|
||||
|
||||
void hb_rule_tag_contentfirst_init(void) {
|
||||
hb_rule_tag_contentfirst_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_contentfirst_add_elems(hb_rule_tag_contentfirst_set);
|
||||
void hb_rule_tag_contentfirst_init(void)
|
||||
{
|
||||
hb_rule_tag_contentfirst_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_contentfirst_add_elems(hb_rule_tag_contentfirst_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_contentfirst_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_contentfirst_set, tag);
|
||||
bool hb_rule_tag_contentfirst_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_contentfirst_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,48 +1,51 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_formatting_set;
|
||||
|
||||
void hb_rule_tag_formatting_add_elems(hb_set_tag_names* set) {
|
||||
// Difference to MDN's inline text semantics list: -br, +del, +ins
|
||||
hb_set_tag_names_add_whole_literal(set, "a");
|
||||
hb_set_tag_names_add_whole_literal(set, "abbr");
|
||||
hb_set_tag_names_add_whole_literal(set, "b");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdi");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdo");
|
||||
hb_set_tag_names_add_whole_literal(set, "cite");
|
||||
hb_set_tag_names_add_whole_literal(set, "data");
|
||||
hb_set_tag_names_add_whole_literal(set, "del");
|
||||
hb_set_tag_names_add_whole_literal(set, "dfn");
|
||||
hb_set_tag_names_add_whole_literal(set, "em");
|
||||
hb_set_tag_names_add_whole_literal(set, "i");
|
||||
hb_set_tag_names_add_whole_literal(set, "ins");
|
||||
hb_set_tag_names_add_whole_literal(set, "kbd");
|
||||
hb_set_tag_names_add_whole_literal(set, "mark");
|
||||
hb_set_tag_names_add_whole_literal(set, "q");
|
||||
hb_set_tag_names_add_whole_literal(set, "rp");
|
||||
hb_set_tag_names_add_whole_literal(set, "rt");
|
||||
hb_set_tag_names_add_whole_literal(set, "rtc");
|
||||
hb_set_tag_names_add_whole_literal(set, "ruby");
|
||||
hb_set_tag_names_add_whole_literal(set, "s");
|
||||
hb_set_tag_names_add_whole_literal(set, "samp");
|
||||
hb_set_tag_names_add_whole_literal(set, "small");
|
||||
hb_set_tag_names_add_whole_literal(set, "span");
|
||||
hb_set_tag_names_add_whole_literal(set, "strong");
|
||||
hb_set_tag_names_add_whole_literal(set, "sub");
|
||||
hb_set_tag_names_add_whole_literal(set, "sup");
|
||||
hb_set_tag_names_add_whole_literal(set, "time");
|
||||
hb_set_tag_names_add_whole_literal(set, "u");
|
||||
hb_set_tag_names_add_whole_literal(set, "var");
|
||||
hb_set_tag_names_add_whole_literal(set, "wbr");
|
||||
void hb_rule_tag_formatting_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
// Difference to MDN's inline text semantics list: -br, +del, +ins
|
||||
hb_set_tag_names_add_whole_literal(set, "a");
|
||||
hb_set_tag_names_add_whole_literal(set, "abbr");
|
||||
hb_set_tag_names_add_whole_literal(set, "b");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdi");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdo");
|
||||
hb_set_tag_names_add_whole_literal(set, "cite");
|
||||
hb_set_tag_names_add_whole_literal(set, "data");
|
||||
hb_set_tag_names_add_whole_literal(set, "del");
|
||||
hb_set_tag_names_add_whole_literal(set, "dfn");
|
||||
hb_set_tag_names_add_whole_literal(set, "em");
|
||||
hb_set_tag_names_add_whole_literal(set, "i");
|
||||
hb_set_tag_names_add_whole_literal(set, "ins");
|
||||
hb_set_tag_names_add_whole_literal(set, "kbd");
|
||||
hb_set_tag_names_add_whole_literal(set, "mark");
|
||||
hb_set_tag_names_add_whole_literal(set, "q");
|
||||
hb_set_tag_names_add_whole_literal(set, "rp");
|
||||
hb_set_tag_names_add_whole_literal(set, "rt");
|
||||
hb_set_tag_names_add_whole_literal(set, "rtc");
|
||||
hb_set_tag_names_add_whole_literal(set, "ruby");
|
||||
hb_set_tag_names_add_whole_literal(set, "s");
|
||||
hb_set_tag_names_add_whole_literal(set, "samp");
|
||||
hb_set_tag_names_add_whole_literal(set, "small");
|
||||
hb_set_tag_names_add_whole_literal(set, "span");
|
||||
hb_set_tag_names_add_whole_literal(set, "strong");
|
||||
hb_set_tag_names_add_whole_literal(set, "sub");
|
||||
hb_set_tag_names_add_whole_literal(set, "sup");
|
||||
hb_set_tag_names_add_whole_literal(set, "time");
|
||||
hb_set_tag_names_add_whole_literal(set, "u");
|
||||
hb_set_tag_names_add_whole_literal(set, "var");
|
||||
hb_set_tag_names_add_whole_literal(set, "wbr");
|
||||
}
|
||||
|
||||
void hb_rule_tag_formatting_init(void) {
|
||||
hb_rule_tag_formatting_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_formatting_add_elems(hb_rule_tag_formatting_set);
|
||||
void hb_rule_tag_formatting_init(void)
|
||||
{
|
||||
hb_rule_tag_formatting_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_formatting_add_elems(hb_rule_tag_formatting_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_formatting_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_formatting_set, tag);
|
||||
bool hb_rule_tag_formatting_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_formatting_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,24 +1,27 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_heading_set;
|
||||
|
||||
void hb_rule_tag_heading_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "hgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "h1");
|
||||
hb_set_tag_names_add_whole_literal(set, "h2");
|
||||
hb_set_tag_names_add_whole_literal(set, "h3");
|
||||
hb_set_tag_names_add_whole_literal(set, "h4");
|
||||
hb_set_tag_names_add_whole_literal(set, "h5");
|
||||
hb_set_tag_names_add_whole_literal(set, "h6");
|
||||
void hb_rule_tag_heading_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "hgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "h1");
|
||||
hb_set_tag_names_add_whole_literal(set, "h2");
|
||||
hb_set_tag_names_add_whole_literal(set, "h3");
|
||||
hb_set_tag_names_add_whole_literal(set, "h4");
|
||||
hb_set_tag_names_add_whole_literal(set, "h5");
|
||||
hb_set_tag_names_add_whole_literal(set, "h6");
|
||||
}
|
||||
|
||||
void hb_rule_tag_heading_init(void) {
|
||||
hb_rule_tag_heading_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(hb_rule_tag_heading_set);
|
||||
void hb_rule_tag_heading_init(void)
|
||||
{
|
||||
hb_rule_tag_heading_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_heading_add_elems(hb_rule_tag_heading_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_heading_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_heading_set, tag);
|
||||
bool hb_rule_tag_heading_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_heading_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,171 +1,174 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element at
|
||||
// 2018-07-01T05:55:00Z
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_html_set;
|
||||
|
||||
void hb_rule_tag_html_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "a");
|
||||
hb_set_tag_names_add_whole_literal(set, "abbr");
|
||||
hb_set_tag_names_add_whole_literal(set, "acronym");
|
||||
hb_set_tag_names_add_whole_literal(set, "address");
|
||||
hb_set_tag_names_add_whole_literal(set, "applet");
|
||||
hb_set_tag_names_add_whole_literal(set, "applet");
|
||||
hb_set_tag_names_add_whole_literal(set, "area");
|
||||
hb_set_tag_names_add_whole_literal(set, "article");
|
||||
hb_set_tag_names_add_whole_literal(set, "aside");
|
||||
hb_set_tag_names_add_whole_literal(set, "audio");
|
||||
hb_set_tag_names_add_whole_literal(set, "b");
|
||||
hb_set_tag_names_add_whole_literal(set, "basefont");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdi");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdo");
|
||||
hb_set_tag_names_add_whole_literal(set, "bgsound");
|
||||
hb_set_tag_names_add_whole_literal(set, "big");
|
||||
hb_set_tag_names_add_whole_literal(set, "blink");
|
||||
hb_set_tag_names_add_whole_literal(set, "blockquote");
|
||||
hb_set_tag_names_add_whole_literal(set, "body");
|
||||
hb_set_tag_names_add_whole_literal(set, "br");
|
||||
hb_set_tag_names_add_whole_literal(set, "button");
|
||||
hb_set_tag_names_add_whole_literal(set, "canvas");
|
||||
hb_set_tag_names_add_whole_literal(set, "caption");
|
||||
hb_set_tag_names_add_whole_literal(set, "center");
|
||||
hb_set_tag_names_add_whole_literal(set, "cite");
|
||||
hb_set_tag_names_add_whole_literal(set, "code");
|
||||
hb_set_tag_names_add_whole_literal(set, "col");
|
||||
hb_set_tag_names_add_whole_literal(set, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "command");
|
||||
hb_set_tag_names_add_whole_literal(set, "content");
|
||||
hb_set_tag_names_add_whole_literal(set, "content");
|
||||
hb_set_tag_names_add_whole_literal(set, "data");
|
||||
hb_set_tag_names_add_whole_literal(set, "datalist");
|
||||
hb_set_tag_names_add_whole_literal(set, "dd");
|
||||
hb_set_tag_names_add_whole_literal(set, "del");
|
||||
hb_set_tag_names_add_whole_literal(set, "details");
|
||||
hb_set_tag_names_add_whole_literal(set, "dfn");
|
||||
hb_set_tag_names_add_whole_literal(set, "dialog");
|
||||
hb_set_tag_names_add_whole_literal(set, "dir");
|
||||
hb_set_tag_names_add_whole_literal(set, "dir");
|
||||
hb_set_tag_names_add_whole_literal(set, "div");
|
||||
hb_set_tag_names_add_whole_literal(set, "dl");
|
||||
hb_set_tag_names_add_whole_literal(set, "dt");
|
||||
hb_set_tag_names_add_whole_literal(set, "element");
|
||||
hb_set_tag_names_add_whole_literal(set, "element");
|
||||
hb_set_tag_names_add_whole_literal(set, "em");
|
||||
hb_set_tag_names_add_whole_literal(set, "embed");
|
||||
hb_set_tag_names_add_whole_literal(set, "fieldset");
|
||||
hb_set_tag_names_add_whole_literal(set, "figcaption");
|
||||
hb_set_tag_names_add_whole_literal(set, "figure");
|
||||
hb_set_tag_names_add_whole_literal(set, "font");
|
||||
hb_set_tag_names_add_whole_literal(set, "footer");
|
||||
hb_set_tag_names_add_whole_literal(set, "form");
|
||||
hb_set_tag_names_add_whole_literal(set, "frame");
|
||||
hb_set_tag_names_add_whole_literal(set, "frameset");
|
||||
hb_set_tag_names_add_whole_literal(set, "h1");
|
||||
hb_set_tag_names_add_whole_literal(set, "h2");
|
||||
hb_set_tag_names_add_whole_literal(set, "h3");
|
||||
hb_set_tag_names_add_whole_literal(set, "h4");
|
||||
hb_set_tag_names_add_whole_literal(set, "h5");
|
||||
hb_set_tag_names_add_whole_literal(set, "h6");
|
||||
hb_set_tag_names_add_whole_literal(set, "head");
|
||||
hb_set_tag_names_add_whole_literal(set, "header");
|
||||
hb_set_tag_names_add_whole_literal(set, "hgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "hr");
|
||||
hb_set_tag_names_add_whole_literal(set, "html");
|
||||
hb_set_tag_names_add_whole_literal(set, "i");
|
||||
hb_set_tag_names_add_whole_literal(set, "iframe");
|
||||
hb_set_tag_names_add_whole_literal(set, "image");
|
||||
hb_set_tag_names_add_whole_literal(set, "img");
|
||||
hb_set_tag_names_add_whole_literal(set, "input");
|
||||
hb_set_tag_names_add_whole_literal(set, "ins");
|
||||
hb_set_tag_names_add_whole_literal(set, "isindex");
|
||||
hb_set_tag_names_add_whole_literal(set, "kbd");
|
||||
hb_set_tag_names_add_whole_literal(set, "keygen");
|
||||
hb_set_tag_names_add_whole_literal(set, "label");
|
||||
hb_set_tag_names_add_whole_literal(set, "legend");
|
||||
hb_set_tag_names_add_whole_literal(set, "li");
|
||||
hb_set_tag_names_add_whole_literal(set, "link");
|
||||
hb_set_tag_names_add_whole_literal(set, "listing");
|
||||
hb_set_tag_names_add_whole_literal(set, "main");
|
||||
hb_set_tag_names_add_whole_literal(set, "map");
|
||||
hb_set_tag_names_add_whole_literal(set, "mark");
|
||||
hb_set_tag_names_add_whole_literal(set, "marquee");
|
||||
hb_set_tag_names_add_whole_literal(set, "menu");
|
||||
hb_set_tag_names_add_whole_literal(set, "menuitem");
|
||||
hb_set_tag_names_add_whole_literal(set, "menuitem");
|
||||
hb_set_tag_names_add_whole_literal(set, "meta");
|
||||
hb_set_tag_names_add_whole_literal(set, "meter");
|
||||
hb_set_tag_names_add_whole_literal(set, "multicol");
|
||||
hb_set_tag_names_add_whole_literal(set, "nav");
|
||||
hb_set_tag_names_add_whole_literal(set, "nextid");
|
||||
hb_set_tag_names_add_whole_literal(set, "nobr");
|
||||
hb_set_tag_names_add_whole_literal(set, "noembed");
|
||||
hb_set_tag_names_add_whole_literal(set, "noembed");
|
||||
hb_set_tag_names_add_whole_literal(set, "noframes");
|
||||
hb_set_tag_names_add_whole_literal(set, "noscript");
|
||||
hb_set_tag_names_add_whole_literal(set, "object");
|
||||
hb_set_tag_names_add_whole_literal(set, "ol");
|
||||
hb_set_tag_names_add_whole_literal(set, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "option");
|
||||
hb_set_tag_names_add_whole_literal(set, "output");
|
||||
hb_set_tag_names_add_whole_literal(set, "p");
|
||||
hb_set_tag_names_add_whole_literal(set, "param");
|
||||
hb_set_tag_names_add_whole_literal(set, "picture");
|
||||
hb_set_tag_names_add_whole_literal(set, "plaintext");
|
||||
hb_set_tag_names_add_whole_literal(set, "pre");
|
||||
hb_set_tag_names_add_whole_literal(set, "progress");
|
||||
hb_set_tag_names_add_whole_literal(set, "q");
|
||||
hb_set_tag_names_add_whole_literal(set, "rp");
|
||||
hb_set_tag_names_add_whole_literal(set, "rt");
|
||||
hb_set_tag_names_add_whole_literal(set, "rtc");
|
||||
hb_set_tag_names_add_whole_literal(set, "ruby");
|
||||
hb_set_tag_names_add_whole_literal(set, "s");
|
||||
hb_set_tag_names_add_whole_literal(set, "samp");
|
||||
hb_set_tag_names_add_whole_literal(set, "script");
|
||||
hb_set_tag_names_add_whole_literal(set, "section");
|
||||
hb_set_tag_names_add_whole_literal(set, "select");
|
||||
hb_set_tag_names_add_whole_literal(set, "shadow");
|
||||
hb_set_tag_names_add_whole_literal(set, "shadow");
|
||||
hb_set_tag_names_add_whole_literal(set, "slot");
|
||||
hb_set_tag_names_add_whole_literal(set, "small");
|
||||
hb_set_tag_names_add_whole_literal(set, "source");
|
||||
hb_set_tag_names_add_whole_literal(set, "spacer");
|
||||
hb_set_tag_names_add_whole_literal(set, "span");
|
||||
hb_set_tag_names_add_whole_literal(set, "strike");
|
||||
hb_set_tag_names_add_whole_literal(set, "strong");
|
||||
hb_set_tag_names_add_whole_literal(set, "style");
|
||||
hb_set_tag_names_add_whole_literal(set, "sub");
|
||||
hb_set_tag_names_add_whole_literal(set, "summary");
|
||||
hb_set_tag_names_add_whole_literal(set, "sup");
|
||||
hb_set_tag_names_add_whole_literal(set, "table");
|
||||
hb_set_tag_names_add_whole_literal(set, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(set, "td");
|
||||
hb_set_tag_names_add_whole_literal(set, "template");
|
||||
hb_set_tag_names_add_whole_literal(set, "textarea");
|
||||
hb_set_tag_names_add_whole_literal(set, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(set, "th");
|
||||
hb_set_tag_names_add_whole_literal(set, "thead");
|
||||
hb_set_tag_names_add_whole_literal(set, "time");
|
||||
hb_set_tag_names_add_whole_literal(set, "title");
|
||||
hb_set_tag_names_add_whole_literal(set, "tr");
|
||||
hb_set_tag_names_add_whole_literal(set, "track");
|
||||
hb_set_tag_names_add_whole_literal(set, "tt");
|
||||
hb_set_tag_names_add_whole_literal(set, "tt");
|
||||
hb_set_tag_names_add_whole_literal(set, "u");
|
||||
hb_set_tag_names_add_whole_literal(set, "ul");
|
||||
hb_set_tag_names_add_whole_literal(set, "var");
|
||||
hb_set_tag_names_add_whole_literal(set, "video");
|
||||
hb_set_tag_names_add_whole_literal(set, "wbr");
|
||||
hb_set_tag_names_add_whole_literal(set, "xmp");
|
||||
void hb_rule_tag_html_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "a");
|
||||
hb_set_tag_names_add_whole_literal(set, "abbr");
|
||||
hb_set_tag_names_add_whole_literal(set, "acronym");
|
||||
hb_set_tag_names_add_whole_literal(set, "address");
|
||||
hb_set_tag_names_add_whole_literal(set, "applet");
|
||||
hb_set_tag_names_add_whole_literal(set, "applet");
|
||||
hb_set_tag_names_add_whole_literal(set, "area");
|
||||
hb_set_tag_names_add_whole_literal(set, "article");
|
||||
hb_set_tag_names_add_whole_literal(set, "aside");
|
||||
hb_set_tag_names_add_whole_literal(set, "audio");
|
||||
hb_set_tag_names_add_whole_literal(set, "b");
|
||||
hb_set_tag_names_add_whole_literal(set, "basefont");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdi");
|
||||
hb_set_tag_names_add_whole_literal(set, "bdo");
|
||||
hb_set_tag_names_add_whole_literal(set, "bgsound");
|
||||
hb_set_tag_names_add_whole_literal(set, "big");
|
||||
hb_set_tag_names_add_whole_literal(set, "blink");
|
||||
hb_set_tag_names_add_whole_literal(set, "blockquote");
|
||||
hb_set_tag_names_add_whole_literal(set, "body");
|
||||
hb_set_tag_names_add_whole_literal(set, "br");
|
||||
hb_set_tag_names_add_whole_literal(set, "button");
|
||||
hb_set_tag_names_add_whole_literal(set, "canvas");
|
||||
hb_set_tag_names_add_whole_literal(set, "caption");
|
||||
hb_set_tag_names_add_whole_literal(set, "center");
|
||||
hb_set_tag_names_add_whole_literal(set, "cite");
|
||||
hb_set_tag_names_add_whole_literal(set, "code");
|
||||
hb_set_tag_names_add_whole_literal(set, "col");
|
||||
hb_set_tag_names_add_whole_literal(set, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "command");
|
||||
hb_set_tag_names_add_whole_literal(set, "content");
|
||||
hb_set_tag_names_add_whole_literal(set, "content");
|
||||
hb_set_tag_names_add_whole_literal(set, "data");
|
||||
hb_set_tag_names_add_whole_literal(set, "datalist");
|
||||
hb_set_tag_names_add_whole_literal(set, "dd");
|
||||
hb_set_tag_names_add_whole_literal(set, "del");
|
||||
hb_set_tag_names_add_whole_literal(set, "details");
|
||||
hb_set_tag_names_add_whole_literal(set, "dfn");
|
||||
hb_set_tag_names_add_whole_literal(set, "dialog");
|
||||
hb_set_tag_names_add_whole_literal(set, "dir");
|
||||
hb_set_tag_names_add_whole_literal(set, "dir");
|
||||
hb_set_tag_names_add_whole_literal(set, "div");
|
||||
hb_set_tag_names_add_whole_literal(set, "dl");
|
||||
hb_set_tag_names_add_whole_literal(set, "dt");
|
||||
hb_set_tag_names_add_whole_literal(set, "element");
|
||||
hb_set_tag_names_add_whole_literal(set, "element");
|
||||
hb_set_tag_names_add_whole_literal(set, "em");
|
||||
hb_set_tag_names_add_whole_literal(set, "embed");
|
||||
hb_set_tag_names_add_whole_literal(set, "fieldset");
|
||||
hb_set_tag_names_add_whole_literal(set, "figcaption");
|
||||
hb_set_tag_names_add_whole_literal(set, "figure");
|
||||
hb_set_tag_names_add_whole_literal(set, "font");
|
||||
hb_set_tag_names_add_whole_literal(set, "footer");
|
||||
hb_set_tag_names_add_whole_literal(set, "form");
|
||||
hb_set_tag_names_add_whole_literal(set, "frame");
|
||||
hb_set_tag_names_add_whole_literal(set, "frameset");
|
||||
hb_set_tag_names_add_whole_literal(set, "h1");
|
||||
hb_set_tag_names_add_whole_literal(set, "h2");
|
||||
hb_set_tag_names_add_whole_literal(set, "h3");
|
||||
hb_set_tag_names_add_whole_literal(set, "h4");
|
||||
hb_set_tag_names_add_whole_literal(set, "h5");
|
||||
hb_set_tag_names_add_whole_literal(set, "h6");
|
||||
hb_set_tag_names_add_whole_literal(set, "head");
|
||||
hb_set_tag_names_add_whole_literal(set, "header");
|
||||
hb_set_tag_names_add_whole_literal(set, "hgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "hr");
|
||||
hb_set_tag_names_add_whole_literal(set, "html");
|
||||
hb_set_tag_names_add_whole_literal(set, "i");
|
||||
hb_set_tag_names_add_whole_literal(set, "iframe");
|
||||
hb_set_tag_names_add_whole_literal(set, "image");
|
||||
hb_set_tag_names_add_whole_literal(set, "img");
|
||||
hb_set_tag_names_add_whole_literal(set, "input");
|
||||
hb_set_tag_names_add_whole_literal(set, "ins");
|
||||
hb_set_tag_names_add_whole_literal(set, "isindex");
|
||||
hb_set_tag_names_add_whole_literal(set, "kbd");
|
||||
hb_set_tag_names_add_whole_literal(set, "keygen");
|
||||
hb_set_tag_names_add_whole_literal(set, "label");
|
||||
hb_set_tag_names_add_whole_literal(set, "legend");
|
||||
hb_set_tag_names_add_whole_literal(set, "li");
|
||||
hb_set_tag_names_add_whole_literal(set, "link");
|
||||
hb_set_tag_names_add_whole_literal(set, "listing");
|
||||
hb_set_tag_names_add_whole_literal(set, "main");
|
||||
hb_set_tag_names_add_whole_literal(set, "map");
|
||||
hb_set_tag_names_add_whole_literal(set, "mark");
|
||||
hb_set_tag_names_add_whole_literal(set, "marquee");
|
||||
hb_set_tag_names_add_whole_literal(set, "menu");
|
||||
hb_set_tag_names_add_whole_literal(set, "menuitem");
|
||||
hb_set_tag_names_add_whole_literal(set, "menuitem");
|
||||
hb_set_tag_names_add_whole_literal(set, "meta");
|
||||
hb_set_tag_names_add_whole_literal(set, "meter");
|
||||
hb_set_tag_names_add_whole_literal(set, "multicol");
|
||||
hb_set_tag_names_add_whole_literal(set, "nav");
|
||||
hb_set_tag_names_add_whole_literal(set, "nextid");
|
||||
hb_set_tag_names_add_whole_literal(set, "nobr");
|
||||
hb_set_tag_names_add_whole_literal(set, "noembed");
|
||||
hb_set_tag_names_add_whole_literal(set, "noembed");
|
||||
hb_set_tag_names_add_whole_literal(set, "noframes");
|
||||
hb_set_tag_names_add_whole_literal(set, "noscript");
|
||||
hb_set_tag_names_add_whole_literal(set, "object");
|
||||
hb_set_tag_names_add_whole_literal(set, "ol");
|
||||
hb_set_tag_names_add_whole_literal(set, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "option");
|
||||
hb_set_tag_names_add_whole_literal(set, "output");
|
||||
hb_set_tag_names_add_whole_literal(set, "p");
|
||||
hb_set_tag_names_add_whole_literal(set, "param");
|
||||
hb_set_tag_names_add_whole_literal(set, "picture");
|
||||
hb_set_tag_names_add_whole_literal(set, "plaintext");
|
||||
hb_set_tag_names_add_whole_literal(set, "pre");
|
||||
hb_set_tag_names_add_whole_literal(set, "progress");
|
||||
hb_set_tag_names_add_whole_literal(set, "q");
|
||||
hb_set_tag_names_add_whole_literal(set, "rp");
|
||||
hb_set_tag_names_add_whole_literal(set, "rt");
|
||||
hb_set_tag_names_add_whole_literal(set, "rtc");
|
||||
hb_set_tag_names_add_whole_literal(set, "ruby");
|
||||
hb_set_tag_names_add_whole_literal(set, "s");
|
||||
hb_set_tag_names_add_whole_literal(set, "samp");
|
||||
hb_set_tag_names_add_whole_literal(set, "script");
|
||||
hb_set_tag_names_add_whole_literal(set, "section");
|
||||
hb_set_tag_names_add_whole_literal(set, "select");
|
||||
hb_set_tag_names_add_whole_literal(set, "shadow");
|
||||
hb_set_tag_names_add_whole_literal(set, "shadow");
|
||||
hb_set_tag_names_add_whole_literal(set, "slot");
|
||||
hb_set_tag_names_add_whole_literal(set, "small");
|
||||
hb_set_tag_names_add_whole_literal(set, "source");
|
||||
hb_set_tag_names_add_whole_literal(set, "spacer");
|
||||
hb_set_tag_names_add_whole_literal(set, "span");
|
||||
hb_set_tag_names_add_whole_literal(set, "strike");
|
||||
hb_set_tag_names_add_whole_literal(set, "strong");
|
||||
hb_set_tag_names_add_whole_literal(set, "style");
|
||||
hb_set_tag_names_add_whole_literal(set, "sub");
|
||||
hb_set_tag_names_add_whole_literal(set, "summary");
|
||||
hb_set_tag_names_add_whole_literal(set, "sup");
|
||||
hb_set_tag_names_add_whole_literal(set, "table");
|
||||
hb_set_tag_names_add_whole_literal(set, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(set, "td");
|
||||
hb_set_tag_names_add_whole_literal(set, "template");
|
||||
hb_set_tag_names_add_whole_literal(set, "textarea");
|
||||
hb_set_tag_names_add_whole_literal(set, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(set, "th");
|
||||
hb_set_tag_names_add_whole_literal(set, "thead");
|
||||
hb_set_tag_names_add_whole_literal(set, "time");
|
||||
hb_set_tag_names_add_whole_literal(set, "title");
|
||||
hb_set_tag_names_add_whole_literal(set, "tr");
|
||||
hb_set_tag_names_add_whole_literal(set, "track");
|
||||
hb_set_tag_names_add_whole_literal(set, "tt");
|
||||
hb_set_tag_names_add_whole_literal(set, "tt");
|
||||
hb_set_tag_names_add_whole_literal(set, "u");
|
||||
hb_set_tag_names_add_whole_literal(set, "ul");
|
||||
hb_set_tag_names_add_whole_literal(set, "var");
|
||||
hb_set_tag_names_add_whole_literal(set, "video");
|
||||
hb_set_tag_names_add_whole_literal(set, "wbr");
|
||||
hb_set_tag_names_add_whole_literal(set, "xmp");
|
||||
}
|
||||
|
||||
void hb_rule_tag_html_init(void) {
|
||||
hb_rule_tag_html_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_html_add_elems(hb_rule_tag_html_set);
|
||||
void hb_rule_tag_html_init(void)
|
||||
{
|
||||
hb_rule_tag_html_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_html_add_elems(hb_rule_tag_html_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_html_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_html_set, tag);
|
||||
bool hb_rule_tag_html_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_html_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,48 +1,51 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_layout_set;
|
||||
|
||||
void hb_rule_tag_layout_add_elems(hb_set_tag_names* set) {
|
||||
hb_rule_tag_sectioning_add_elems(set);
|
||||
hb_set_tag_names_add_whole_literal(set, "blockquote");
|
||||
hb_set_tag_names_add_whole_literal(set, "body");
|
||||
hb_set_tag_names_add_whole_literal(set, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "datalist");
|
||||
hb_set_tag_names_add_whole_literal(set, "dialog");
|
||||
hb_set_tag_names_add_whole_literal(set, "div");
|
||||
hb_set_tag_names_add_whole_literal(set, "dl");
|
||||
hb_set_tag_names_add_whole_literal(set, "fieldset");
|
||||
hb_set_tag_names_add_whole_literal(set, "figure");
|
||||
hb_set_tag_names_add_whole_literal(set, "footer");
|
||||
hb_set_tag_names_add_whole_literal(set, "form");
|
||||
hb_set_tag_names_add_whole_literal(set, "head");
|
||||
hb_set_tag_names_add_whole_literal(set, "header");
|
||||
hb_set_tag_names_add_whole_literal(set, "hgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "html");
|
||||
hb_set_tag_names_add_whole_literal(set, "main");
|
||||
hb_set_tag_names_add_whole_literal(set, "map");
|
||||
hb_set_tag_names_add_whole_literal(set, "menu");
|
||||
hb_set_tag_names_add_whole_literal(set, "nav");
|
||||
hb_set_tag_names_add_whole_literal(set, "ol");
|
||||
hb_set_tag_names_add_whole_literal(set, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "picture");
|
||||
hb_set_tag_names_add_whole_literal(set, "section");
|
||||
hb_set_tag_names_add_whole_literal(set, "select");
|
||||
hb_set_tag_names_add_whole_literal(set, "table");
|
||||
hb_set_tag_names_add_whole_literal(set, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(set, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(set, "thead");
|
||||
hb_set_tag_names_add_whole_literal(set, "tr");
|
||||
hb_set_tag_names_add_whole_literal(set, "ul");
|
||||
void hb_rule_tag_layout_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_rule_tag_sectioning_add_elems(set);
|
||||
hb_set_tag_names_add_whole_literal(set, "blockquote");
|
||||
hb_set_tag_names_add_whole_literal(set, "body");
|
||||
hb_set_tag_names_add_whole_literal(set, "colgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "datalist");
|
||||
hb_set_tag_names_add_whole_literal(set, "dialog");
|
||||
hb_set_tag_names_add_whole_literal(set, "div");
|
||||
hb_set_tag_names_add_whole_literal(set, "dl");
|
||||
hb_set_tag_names_add_whole_literal(set, "fieldset");
|
||||
hb_set_tag_names_add_whole_literal(set, "figure");
|
||||
hb_set_tag_names_add_whole_literal(set, "footer");
|
||||
hb_set_tag_names_add_whole_literal(set, "form");
|
||||
hb_set_tag_names_add_whole_literal(set, "head");
|
||||
hb_set_tag_names_add_whole_literal(set, "header");
|
||||
hb_set_tag_names_add_whole_literal(set, "hgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "html");
|
||||
hb_set_tag_names_add_whole_literal(set, "main");
|
||||
hb_set_tag_names_add_whole_literal(set, "map");
|
||||
hb_set_tag_names_add_whole_literal(set, "menu");
|
||||
hb_set_tag_names_add_whole_literal(set, "nav");
|
||||
hb_set_tag_names_add_whole_literal(set, "ol");
|
||||
hb_set_tag_names_add_whole_literal(set, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(set, "picture");
|
||||
hb_set_tag_names_add_whole_literal(set, "section");
|
||||
hb_set_tag_names_add_whole_literal(set, "select");
|
||||
hb_set_tag_names_add_whole_literal(set, "table");
|
||||
hb_set_tag_names_add_whole_literal(set, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(set, "tfoot");
|
||||
hb_set_tag_names_add_whole_literal(set, "thead");
|
||||
hb_set_tag_names_add_whole_literal(set, "tr");
|
||||
hb_set_tag_names_add_whole_literal(set, "ul");
|
||||
}
|
||||
|
||||
void hb_rule_tag_layout_init(void) {
|
||||
hb_rule_tag_layout_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_layout_add_elems(hb_rule_tag_layout_set);
|
||||
void hb_rule_tag_layout_init(void)
|
||||
{
|
||||
hb_rule_tag_layout_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_layout_add_elems(hb_rule_tag_layout_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_layout_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_layout_set, tag);
|
||||
bool hb_rule_tag_layout_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_layout_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,19 +1,22 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_media_set;
|
||||
|
||||
void hb_rule_tag_media_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "audio");
|
||||
hb_set_tag_names_add_whole_literal(set, "video");
|
||||
void hb_rule_tag_media_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "audio");
|
||||
hb_set_tag_names_add_whole_literal(set, "video");
|
||||
}
|
||||
|
||||
void hb_rule_tag_media_init(void) {
|
||||
hb_rule_tag_media_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(hb_rule_tag_media_set);
|
||||
void hb_rule_tag_media_init(void)
|
||||
{
|
||||
hb_rule_tag_media_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(hb_rule_tag_media_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_media_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_media_set, tag);
|
||||
bool hb_rule_tag_media_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_media_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,22 +1,25 @@
|
|||
#include <hb/collection.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static nh_bitfield_ascii* hb_rule_tag_name_set;
|
||||
|
||||
void hb_rule_tag_name_add_elems(nh_bitfield_ascii* set) {
|
||||
hb_rule_ascii_lowercase_add_elems(set);
|
||||
hb_rule_ascii_uppercase_add_elems(set);
|
||||
hb_rule_ascii_digit_add_elems(set);
|
||||
nh_bitfield_ascii_add(set, ':');
|
||||
nh_bitfield_ascii_add(set, '-');
|
||||
void hb_rule_tag_name_add_elems(nh_bitfield_ascii* set)
|
||||
{
|
||||
hb_rule_ascii_lowercase_add_elems(set);
|
||||
hb_rule_ascii_uppercase_add_elems(set);
|
||||
hb_rule_ascii_digit_add_elems(set);
|
||||
nh_bitfield_ascii_add(set, ':');
|
||||
nh_bitfield_ascii_add(set, '-');
|
||||
}
|
||||
|
||||
void hb_rule_tag_name_init(void) {
|
||||
hb_rule_tag_name_set = nh_bitfield_ascii_create();
|
||||
hb_rule_tag_name_add_elems(hb_rule_tag_name_set);
|
||||
void hb_rule_tag_name_init(void)
|
||||
{
|
||||
hb_rule_tag_name_set = nh_bitfield_ascii_create();
|
||||
hb_rule_tag_name_add_elems(hb_rule_tag_name_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_name_check(hb_rune c) {
|
||||
return nh_bitfield_ascii_has(hb_rule_tag_name_set, c);
|
||||
bool hb_rule_tag_name_check(hb_rune c)
|
||||
{
|
||||
return nh_bitfield_ascii_has(hb_rule_tag_name_set, c);
|
||||
}
|
||||
|
|
|
@ -1,15 +1,20 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
void hb_rule_tag_parent_blacklist_init(void) {
|
||||
// Don't do anything. This rule is currently unused.
|
||||
void hb_rule_tag_parent_blacklist_init(void)
|
||||
{
|
||||
// Don't do anything. This rule is currently unused.
|
||||
}
|
||||
|
||||
// Check if a child is allowed to have a specific parent, based on the blacklist.
|
||||
bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child, nh_view_str* parent) {
|
||||
// Since this rule is currently not being used, directly allow without any checks.
|
||||
(void) child;
|
||||
(void) parent;
|
||||
return true;
|
||||
// Check if a child is allowed to have a specific parent, based on the
|
||||
// blacklist.
|
||||
bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child,
|
||||
nh_view_str* parent)
|
||||
{
|
||||
// Since this rule is currently not being used, directly allow without
|
||||
// any checks.
|
||||
(void) child;
|
||||
(void) parent;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,145 +1,151 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_map_tag_relations* hb_rule_tag_parent_whitelist_map;
|
||||
|
||||
void hb_rule_tag_parent_whitelist_map_add_entries(hb_map_tag_relations* map) {
|
||||
// <caption>
|
||||
hb_set_tag_names* caption = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(caption, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "caption", caption);
|
||||
void hb_rule_tag_parent_whitelist_map_add_entries(hb_map_tag_relations* map)
|
||||
{
|
||||
// <caption>
|
||||
hb_set_tag_names* caption = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(caption, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "caption", caption);
|
||||
|
||||
// <col>
|
||||
hb_set_tag_names* col = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(col, "table");
|
||||
hb_set_tag_names_add_whole_literal(col, "colgroup");
|
||||
hb_map_tag_relations_set_whole_literal(map, "col", col);
|
||||
// <col>
|
||||
hb_set_tag_names* col = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(col, "table");
|
||||
hb_set_tag_names_add_whole_literal(col, "colgroup");
|
||||
hb_map_tag_relations_set_whole_literal(map, "col", col);
|
||||
|
||||
// <colgroup>
|
||||
hb_set_tag_names* colgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(colgroup, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
|
||||
// <colgroup>
|
||||
hb_set_tag_names* colgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(colgroup, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "colgroup", colgroup);
|
||||
|
||||
// <dd>
|
||||
hb_set_tag_names* dd = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dd, "dl");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dd", dd);
|
||||
// <dd>
|
||||
hb_set_tag_names* dd = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dd, "dl");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dd", dd);
|
||||
|
||||
// <dt>
|
||||
hb_set_tag_names* dt = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dt, "dl");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
|
||||
// <dt>
|
||||
hb_set_tag_names* dt = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(dt, "dl");
|
||||
hb_map_tag_relations_set_whole_literal(map, "dt", dt);
|
||||
|
||||
// <figcaption>
|
||||
hb_set_tag_names* figcaption = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(figcaption, "figure");
|
||||
hb_map_tag_relations_set_whole_literal(map, "figcaption", figcaption);
|
||||
// <figcaption>
|
||||
hb_set_tag_names* figcaption = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(figcaption, "figure");
|
||||
hb_map_tag_relations_set_whole_literal(map, "figcaption", figcaption);
|
||||
|
||||
// <legend>
|
||||
hb_set_tag_names* legend = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(legend, "fieldset");
|
||||
hb_map_tag_relations_set_whole_literal(map, "legend", legend);
|
||||
// <legend>
|
||||
hb_set_tag_names* legend = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(legend, "fieldset");
|
||||
hb_map_tag_relations_set_whole_literal(map, "legend", legend);
|
||||
|
||||
// <li>
|
||||
hb_set_tag_names* li = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(li, "ul");
|
||||
hb_set_tag_names_add_whole_literal(li, "ol");
|
||||
hb_set_tag_names_add_whole_literal(li, "menu");
|
||||
hb_map_tag_relations_set_whole_literal(map, "li", li);
|
||||
// <li>
|
||||
hb_set_tag_names* li = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(li, "ul");
|
||||
hb_set_tag_names_add_whole_literal(li, "ol");
|
||||
hb_set_tag_names_add_whole_literal(li, "menu");
|
||||
hb_map_tag_relations_set_whole_literal(map, "li", li);
|
||||
|
||||
// <optgroup>
|
||||
hb_set_tag_names* optgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(optgroup, "select");
|
||||
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
|
||||
// <optgroup>
|
||||
hb_set_tag_names* optgroup = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(optgroup, "select");
|
||||
hb_map_tag_relations_set_whole_literal(map, "optgroup", optgroup);
|
||||
|
||||
// <option>
|
||||
hb_set_tag_names* option = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(option, "select");
|
||||
hb_set_tag_names_add_whole_literal(option, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(option, "datalist");
|
||||
hb_map_tag_relations_set_whole_literal(map, "option", option);
|
||||
// <option>
|
||||
hb_set_tag_names* option = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(option, "select");
|
||||
hb_set_tag_names_add_whole_literal(option, "optgroup");
|
||||
hb_set_tag_names_add_whole_literal(option, "datalist");
|
||||
hb_map_tag_relations_set_whole_literal(map, "option", option);
|
||||
|
||||
// <param>
|
||||
hb_set_tag_names* param = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(param, "object");
|
||||
hb_map_tag_relations_set_whole_literal(map, "param", param);
|
||||
// <param>
|
||||
hb_set_tag_names* param = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(param, "object");
|
||||
hb_map_tag_relations_set_whole_literal(map, "param", param);
|
||||
|
||||
// <rp>
|
||||
hb_set_tag_names* rp = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rp, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rp", rp);
|
||||
// <rp>
|
||||
hb_set_tag_names* rp = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rp, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rp", rp);
|
||||
|
||||
// <rt>
|
||||
hb_set_tag_names* rt = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rt, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rt", rt);
|
||||
// <rt>
|
||||
hb_set_tag_names* rt = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rt, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rt", rt);
|
||||
|
||||
// <rtc>
|
||||
hb_set_tag_names* rtc = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rtc, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rtc", rtc);
|
||||
// <rtc>
|
||||
hb_set_tag_names* rtc = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(rtc, "ruby");
|
||||
hb_map_tag_relations_set_whole_literal(map, "rtc", rtc);
|
||||
|
||||
// <summary>
|
||||
hb_set_tag_names* summary = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(summary, "details");
|
||||
hb_map_tag_relations_set_whole_literal(map, "summary", summary);
|
||||
// <summary>
|
||||
hb_set_tag_names* summary = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(summary, "details");
|
||||
hb_map_tag_relations_set_whole_literal(map, "summary", summary);
|
||||
|
||||
// <source>
|
||||
hb_set_tag_names* source = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(source);
|
||||
hb_set_tag_names_add_whole_literal(source, "picture");
|
||||
hb_map_tag_relations_set_whole_literal(map, "source", source);
|
||||
// <source>
|
||||
hb_set_tag_names* source = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(source);
|
||||
hb_set_tag_names_add_whole_literal(source, "picture");
|
||||
hb_map_tag_relations_set_whole_literal(map, "source", source);
|
||||
|
||||
// <track>
|
||||
hb_set_tag_names* track = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(track);
|
||||
hb_map_tag_relations_set_whole_literal(map, "track", track);
|
||||
// <track>
|
||||
hb_set_tag_names* track = hb_set_tag_names_create();
|
||||
hb_rule_tag_media_add_elems(track);
|
||||
hb_map_tag_relations_set_whole_literal(map, "track", track);
|
||||
|
||||
// <tbody>
|
||||
hb_set_tag_names* tbody = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tbody, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
|
||||
// <tbody>
|
||||
hb_set_tag_names* tbody = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tbody, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tbody", tbody);
|
||||
|
||||
// <td>
|
||||
hb_set_tag_names* td = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(td, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "td", td);
|
||||
// <td>
|
||||
hb_set_tag_names* td = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(td, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "td", td);
|
||||
|
||||
// <tfoot>
|
||||
hb_set_tag_names* tfoot = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tfoot, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
|
||||
// <tfoot>
|
||||
hb_set_tag_names* tfoot = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tfoot, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tfoot", tfoot);
|
||||
|
||||
// <th>
|
||||
hb_set_tag_names* th = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(th, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "th", th);
|
||||
// <th>
|
||||
hb_set_tag_names* th = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(th, "tr");
|
||||
hb_map_tag_relations_set_whole_literal(map, "th", th);
|
||||
|
||||
// <thead>
|
||||
hb_set_tag_names* thead = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(thead, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
|
||||
// <thead>
|
||||
hb_set_tag_names* thead = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(thead, "table");
|
||||
hb_map_tag_relations_set_whole_literal(map, "thead", thead);
|
||||
|
||||
// <tr>
|
||||
hb_set_tag_names* tr = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tr, "table");
|
||||
hb_set_tag_names_add_whole_literal(tr, "thead");
|
||||
hb_set_tag_names_add_whole_literal(tr, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(tr, "tfoot");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
|
||||
// <tr>
|
||||
hb_set_tag_names* tr = hb_set_tag_names_create();
|
||||
hb_set_tag_names_add_whole_literal(tr, "table");
|
||||
hb_set_tag_names_add_whole_literal(tr, "thead");
|
||||
hb_set_tag_names_add_whole_literal(tr, "tbody");
|
||||
hb_set_tag_names_add_whole_literal(tr, "tfoot");
|
||||
hb_map_tag_relations_set_whole_literal(map, "tr", tr);
|
||||
|
||||
// <template>
|
||||
// Should be <body>, <frameset>, <head>, <dl>, <colgroup>, but ignoring.
|
||||
// <template>
|
||||
// Should be <body>, <frameset>, <head>, <dl>, <colgroup>, but ignoring.
|
||||
}
|
||||
|
||||
void hb_rule_tag_parent_whitelist_init(void) {
|
||||
hb_rule_tag_parent_whitelist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_parent_whitelist_map_add_entries(hb_rule_tag_parent_whitelist_map);
|
||||
void hb_rule_tag_parent_whitelist_init(void)
|
||||
{
|
||||
hb_rule_tag_parent_whitelist_map = hb_map_tag_relations_create();
|
||||
hb_rule_tag_parent_whitelist_map_add_entries(
|
||||
hb_rule_tag_parent_whitelist_map);
|
||||
}
|
||||
|
||||
// Check if a child is allowed to have a specific parent.
|
||||
bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child, nh_view_str* parent) {
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(hb_rule_tag_parent_whitelist_map, child);
|
||||
return set == NULL || hb_set_tag_names_has(set, parent);
|
||||
bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child,
|
||||
nh_view_str* parent)
|
||||
{
|
||||
hb_set_tag_names* set = hb_map_tag_relations_get(
|
||||
hb_rule_tag_parent_whitelist_map, child);
|
||||
return set == NULL || hb_set_tag_names_has(set, parent);
|
||||
}
|
||||
|
|
|
@ -1,21 +1,24 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_sectioning_set;
|
||||
|
||||
void hb_rule_tag_sectioning_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "article");
|
||||
hb_set_tag_names_add_whole_literal(set, "aside");
|
||||
hb_set_tag_names_add_whole_literal(set, "nav");
|
||||
hb_set_tag_names_add_whole_literal(set, "section");
|
||||
void hb_rule_tag_sectioning_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "article");
|
||||
hb_set_tag_names_add_whole_literal(set, "aside");
|
||||
hb_set_tag_names_add_whole_literal(set, "nav");
|
||||
hb_set_tag_names_add_whole_literal(set, "section");
|
||||
}
|
||||
|
||||
void hb_rule_tag_sectioning_init(void) {
|
||||
hb_rule_tag_sectioning_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_sectioning_add_elems(hb_rule_tag_sectioning_set);
|
||||
void hb_rule_tag_sectioning_init(void)
|
||||
{
|
||||
hb_rule_tag_sectioning_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_sectioning_add_elems(hb_rule_tag_sectioning_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_sectioning_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_sectioning_set, tag);
|
||||
bool hb_rule_tag_sectioning_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_sectioning_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,32 +1,37 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_specific_set;
|
||||
|
||||
void hb_rule_tag_specific_add_elems(hb_set_tag_names* set) {
|
||||
hb_rule_tag_svg_add_elems(set);
|
||||
hb_set_tag_names_add_whole_literal(set, "area");
|
||||
hb_set_tag_names_add_whole_literal(set, "base");
|
||||
hb_set_tag_names_add_whole_literal(set, "br");
|
||||
hb_set_tag_names_add_whole_literal(set, "code"); // Reason: unlikely to want to minify
|
||||
hb_set_tag_names_add_whole_literal(set, "col");
|
||||
hb_set_tag_names_add_whole_literal(set, "embed");
|
||||
hb_set_tag_names_add_whole_literal(set, "hr");
|
||||
hb_set_tag_names_add_whole_literal(set, "img");
|
||||
hb_set_tag_names_add_whole_literal(set, "input");
|
||||
hb_set_tag_names_add_whole_literal(set, "param");
|
||||
hb_set_tag_names_add_whole_literal(set, "pre"); // Reason: unlikely to want to minify
|
||||
hb_set_tag_names_add_whole_literal(set, "script");
|
||||
hb_set_tag_names_add_whole_literal(set, "source");
|
||||
hb_set_tag_names_add_whole_literal(set, "track");
|
||||
void hb_rule_tag_specific_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_rule_tag_svg_add_elems(set);
|
||||
hb_set_tag_names_add_whole_literal(set, "area");
|
||||
hb_set_tag_names_add_whole_literal(set, "base");
|
||||
hb_set_tag_names_add_whole_literal(set, "br");
|
||||
hb_set_tag_names_add_whole_literal(
|
||||
set, "code"); // Reason: unlikely to want to minify
|
||||
hb_set_tag_names_add_whole_literal(set, "col");
|
||||
hb_set_tag_names_add_whole_literal(set, "embed");
|
||||
hb_set_tag_names_add_whole_literal(set, "hr");
|
||||
hb_set_tag_names_add_whole_literal(set, "img");
|
||||
hb_set_tag_names_add_whole_literal(set, "input");
|
||||
hb_set_tag_names_add_whole_literal(set, "param");
|
||||
hb_set_tag_names_add_whole_literal(
|
||||
set, "pre"); // Reason: unlikely to want to minify
|
||||
hb_set_tag_names_add_whole_literal(set, "script");
|
||||
hb_set_tag_names_add_whole_literal(set, "source");
|
||||
hb_set_tag_names_add_whole_literal(set, "track");
|
||||
}
|
||||
|
||||
void hb_rule_tag_specific_init(void) {
|
||||
hb_rule_tag_specific_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_specific_add_elems(hb_rule_tag_specific_set);
|
||||
void hb_rule_tag_specific_init(void)
|
||||
{
|
||||
hb_rule_tag_specific_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_specific_add_elems(hb_rule_tag_specific_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_specific_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_specific_set, tag);
|
||||
bool hb_rule_tag_specific_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_specific_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,110 +1,113 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
// Sourced from https://developer.mozilla.org/en-US/docs/Web/SVG/Element at
|
||||
// 2018-08-04T03:50:00Z
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_svg_set;
|
||||
|
||||
void hb_rule_tag_svg_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "a");
|
||||
hb_set_tag_names_add_whole_literal(set, "altGlyph");
|
||||
hb_set_tag_names_add_whole_literal(set, "altGlyphDef");
|
||||
hb_set_tag_names_add_whole_literal(set, "altGlyphItem");
|
||||
hb_set_tag_names_add_whole_literal(set, "animate");
|
||||
hb_set_tag_names_add_whole_literal(set, "animateColor");
|
||||
hb_set_tag_names_add_whole_literal(set, "animateMotion");
|
||||
hb_set_tag_names_add_whole_literal(set, "animateTransform");
|
||||
hb_set_tag_names_add_whole_literal(set, "circle");
|
||||
hb_set_tag_names_add_whole_literal(set, "clipPath");
|
||||
hb_set_tag_names_add_whole_literal(set, "color-profile");
|
||||
hb_set_tag_names_add_whole_literal(set, "cursor");
|
||||
hb_set_tag_names_add_whole_literal(set, "defs");
|
||||
hb_set_tag_names_add_whole_literal(set, "desc");
|
||||
hb_set_tag_names_add_whole_literal(set, "discard");
|
||||
hb_set_tag_names_add_whole_literal(set, "ellipse");
|
||||
hb_set_tag_names_add_whole_literal(set, "feBlend");
|
||||
hb_set_tag_names_add_whole_literal(set, "feColorMatrix");
|
||||
hb_set_tag_names_add_whole_literal(set, "feComponentTransfer");
|
||||
hb_set_tag_names_add_whole_literal(set, "feComposite");
|
||||
hb_set_tag_names_add_whole_literal(set, "feConvolveMatrix");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDiffuseLighting");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDisplacementMap");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDistantLight");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDropShadow");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFlood");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncA");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncB");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncG");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncR");
|
||||
hb_set_tag_names_add_whole_literal(set, "feGaussianBlur");
|
||||
hb_set_tag_names_add_whole_literal(set, "feImage");
|
||||
hb_set_tag_names_add_whole_literal(set, "feMerge");
|
||||
hb_set_tag_names_add_whole_literal(set, "feMergeNode");
|
||||
hb_set_tag_names_add_whole_literal(set, "feMorphology");
|
||||
hb_set_tag_names_add_whole_literal(set, "feOffset");
|
||||
hb_set_tag_names_add_whole_literal(set, "fePointLight");
|
||||
hb_set_tag_names_add_whole_literal(set, "feSpecularLighting");
|
||||
hb_set_tag_names_add_whole_literal(set, "feSpotLight");
|
||||
hb_set_tag_names_add_whole_literal(set, "feTile");
|
||||
hb_set_tag_names_add_whole_literal(set, "feTurbulence");
|
||||
hb_set_tag_names_add_whole_literal(set, "filter");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-format");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-name");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-src");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-uri");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face");
|
||||
hb_set_tag_names_add_whole_literal(set, "font");
|
||||
hb_set_tag_names_add_whole_literal(set, "foreignObject");
|
||||
hb_set_tag_names_add_whole_literal(set, "g");
|
||||
hb_set_tag_names_add_whole_literal(set, "glyph");
|
||||
hb_set_tag_names_add_whole_literal(set, "glyphRef");
|
||||
hb_set_tag_names_add_whole_literal(set, "hatch");
|
||||
hb_set_tag_names_add_whole_literal(set, "hatchpath");
|
||||
hb_set_tag_names_add_whole_literal(set, "hkern");
|
||||
hb_set_tag_names_add_whole_literal(set, "image");
|
||||
hb_set_tag_names_add_whole_literal(set, "line");
|
||||
hb_set_tag_names_add_whole_literal(set, "linearGradient");
|
||||
hb_set_tag_names_add_whole_literal(set, "marker");
|
||||
hb_set_tag_names_add_whole_literal(set, "mask");
|
||||
hb_set_tag_names_add_whole_literal(set, "mesh");
|
||||
hb_set_tag_names_add_whole_literal(set, "meshgradient");
|
||||
hb_set_tag_names_add_whole_literal(set, "meshpatch");
|
||||
hb_set_tag_names_add_whole_literal(set, "meshrow");
|
||||
hb_set_tag_names_add_whole_literal(set, "metadata");
|
||||
hb_set_tag_names_add_whole_literal(set, "missing-glyph");
|
||||
hb_set_tag_names_add_whole_literal(set, "mpath");
|
||||
hb_set_tag_names_add_whole_literal(set, "path");
|
||||
hb_set_tag_names_add_whole_literal(set, "pattern");
|
||||
hb_set_tag_names_add_whole_literal(set, "polygon");
|
||||
hb_set_tag_names_add_whole_literal(set, "polyline");
|
||||
hb_set_tag_names_add_whole_literal(set, "radialGradient");
|
||||
hb_set_tag_names_add_whole_literal(set, "rect");
|
||||
hb_set_tag_names_add_whole_literal(set, "script");
|
||||
hb_set_tag_names_add_whole_literal(set, "set");
|
||||
hb_set_tag_names_add_whole_literal(set, "solidcolor");
|
||||
hb_set_tag_names_add_whole_literal(set, "stop");
|
||||
hb_set_tag_names_add_whole_literal(set, "style");
|
||||
hb_set_tag_names_add_whole_literal(set, "svg");
|
||||
hb_set_tag_names_add_whole_literal(set, "switch");
|
||||
hb_set_tag_names_add_whole_literal(set, "symbol");
|
||||
hb_set_tag_names_add_whole_literal(set, "text");
|
||||
hb_set_tag_names_add_whole_literal(set, "textPath");
|
||||
hb_set_tag_names_add_whole_literal(set, "title");
|
||||
hb_set_tag_names_add_whole_literal(set, "tref");
|
||||
hb_set_tag_names_add_whole_literal(set, "tspan");
|
||||
hb_set_tag_names_add_whole_literal(set, "unknown");
|
||||
hb_set_tag_names_add_whole_literal(set, "use");
|
||||
hb_set_tag_names_add_whole_literal(set, "view");
|
||||
hb_set_tag_names_add_whole_literal(set, "vkern");
|
||||
void hb_rule_tag_svg_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "a");
|
||||
hb_set_tag_names_add_whole_literal(set, "altGlyph");
|
||||
hb_set_tag_names_add_whole_literal(set, "altGlyphDef");
|
||||
hb_set_tag_names_add_whole_literal(set, "altGlyphItem");
|
||||
hb_set_tag_names_add_whole_literal(set, "animate");
|
||||
hb_set_tag_names_add_whole_literal(set, "animateColor");
|
||||
hb_set_tag_names_add_whole_literal(set, "animateMotion");
|
||||
hb_set_tag_names_add_whole_literal(set, "animateTransform");
|
||||
hb_set_tag_names_add_whole_literal(set, "circle");
|
||||
hb_set_tag_names_add_whole_literal(set, "clipPath");
|
||||
hb_set_tag_names_add_whole_literal(set, "color-profile");
|
||||
hb_set_tag_names_add_whole_literal(set, "cursor");
|
||||
hb_set_tag_names_add_whole_literal(set, "defs");
|
||||
hb_set_tag_names_add_whole_literal(set, "desc");
|
||||
hb_set_tag_names_add_whole_literal(set, "discard");
|
||||
hb_set_tag_names_add_whole_literal(set, "ellipse");
|
||||
hb_set_tag_names_add_whole_literal(set, "feBlend");
|
||||
hb_set_tag_names_add_whole_literal(set, "feColorMatrix");
|
||||
hb_set_tag_names_add_whole_literal(set, "feComponentTransfer");
|
||||
hb_set_tag_names_add_whole_literal(set, "feComposite");
|
||||
hb_set_tag_names_add_whole_literal(set, "feConvolveMatrix");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDiffuseLighting");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDisplacementMap");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDistantLight");
|
||||
hb_set_tag_names_add_whole_literal(set, "feDropShadow");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFlood");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncA");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncB");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncG");
|
||||
hb_set_tag_names_add_whole_literal(set, "feFuncR");
|
||||
hb_set_tag_names_add_whole_literal(set, "feGaussianBlur");
|
||||
hb_set_tag_names_add_whole_literal(set, "feImage");
|
||||
hb_set_tag_names_add_whole_literal(set, "feMerge");
|
||||
hb_set_tag_names_add_whole_literal(set, "feMergeNode");
|
||||
hb_set_tag_names_add_whole_literal(set, "feMorphology");
|
||||
hb_set_tag_names_add_whole_literal(set, "feOffset");
|
||||
hb_set_tag_names_add_whole_literal(set, "fePointLight");
|
||||
hb_set_tag_names_add_whole_literal(set, "feSpecularLighting");
|
||||
hb_set_tag_names_add_whole_literal(set, "feSpotLight");
|
||||
hb_set_tag_names_add_whole_literal(set, "feTile");
|
||||
hb_set_tag_names_add_whole_literal(set, "feTurbulence");
|
||||
hb_set_tag_names_add_whole_literal(set, "filter");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-format");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-name");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-src");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face-uri");
|
||||
hb_set_tag_names_add_whole_literal(set, "font-face");
|
||||
hb_set_tag_names_add_whole_literal(set, "font");
|
||||
hb_set_tag_names_add_whole_literal(set, "foreignObject");
|
||||
hb_set_tag_names_add_whole_literal(set, "g");
|
||||
hb_set_tag_names_add_whole_literal(set, "glyph");
|
||||
hb_set_tag_names_add_whole_literal(set, "glyphRef");
|
||||
hb_set_tag_names_add_whole_literal(set, "hatch");
|
||||
hb_set_tag_names_add_whole_literal(set, "hatchpath");
|
||||
hb_set_tag_names_add_whole_literal(set, "hkern");
|
||||
hb_set_tag_names_add_whole_literal(set, "image");
|
||||
hb_set_tag_names_add_whole_literal(set, "line");
|
||||
hb_set_tag_names_add_whole_literal(set, "linearGradient");
|
||||
hb_set_tag_names_add_whole_literal(set, "marker");
|
||||
hb_set_tag_names_add_whole_literal(set, "mask");
|
||||
hb_set_tag_names_add_whole_literal(set, "mesh");
|
||||
hb_set_tag_names_add_whole_literal(set, "meshgradient");
|
||||
hb_set_tag_names_add_whole_literal(set, "meshpatch");
|
||||
hb_set_tag_names_add_whole_literal(set, "meshrow");
|
||||
hb_set_tag_names_add_whole_literal(set, "metadata");
|
||||
hb_set_tag_names_add_whole_literal(set, "missing-glyph");
|
||||
hb_set_tag_names_add_whole_literal(set, "mpath");
|
||||
hb_set_tag_names_add_whole_literal(set, "path");
|
||||
hb_set_tag_names_add_whole_literal(set, "pattern");
|
||||
hb_set_tag_names_add_whole_literal(set, "polygon");
|
||||
hb_set_tag_names_add_whole_literal(set, "polyline");
|
||||
hb_set_tag_names_add_whole_literal(set, "radialGradient");
|
||||
hb_set_tag_names_add_whole_literal(set, "rect");
|
||||
hb_set_tag_names_add_whole_literal(set, "script");
|
||||
hb_set_tag_names_add_whole_literal(set, "set");
|
||||
hb_set_tag_names_add_whole_literal(set, "solidcolor");
|
||||
hb_set_tag_names_add_whole_literal(set, "stop");
|
||||
hb_set_tag_names_add_whole_literal(set, "style");
|
||||
hb_set_tag_names_add_whole_literal(set, "svg");
|
||||
hb_set_tag_names_add_whole_literal(set, "switch");
|
||||
hb_set_tag_names_add_whole_literal(set, "symbol");
|
||||
hb_set_tag_names_add_whole_literal(set, "text");
|
||||
hb_set_tag_names_add_whole_literal(set, "textPath");
|
||||
hb_set_tag_names_add_whole_literal(set, "title");
|
||||
hb_set_tag_names_add_whole_literal(set, "tref");
|
||||
hb_set_tag_names_add_whole_literal(set, "tspan");
|
||||
hb_set_tag_names_add_whole_literal(set, "unknown");
|
||||
hb_set_tag_names_add_whole_literal(set, "use");
|
||||
hb_set_tag_names_add_whole_literal(set, "view");
|
||||
hb_set_tag_names_add_whole_literal(set, "vkern");
|
||||
}
|
||||
|
||||
void hb_rule_tag_svg_init(void) {
|
||||
hb_rule_tag_svg_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_svg_add_elems(hb_rule_tag_svg_set);
|
||||
void hb_rule_tag_svg_init(void)
|
||||
{
|
||||
hb_rule_tag_svg_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_svg_add_elems(hb_rule_tag_svg_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_svg_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_svg_set, tag);
|
||||
bool hb_rule_tag_svg_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_svg_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
bool hb_rule_tag_valid_check(nh_view_str* tag) {
|
||||
return hb_rule_tag_html_check(tag) ||
|
||||
hb_rule_tag_svg_check(tag);
|
||||
bool hb_rule_tag_valid_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_rule_tag_html_check(tag) || hb_rule_tag_svg_check(tag);
|
||||
}
|
||||
|
|
|
@ -1,32 +1,35 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_void_set;
|
||||
|
||||
void hb_rule_tag_void_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "area");
|
||||
hb_set_tag_names_add_whole_literal(set, "base");
|
||||
hb_set_tag_names_add_whole_literal(set, "br");
|
||||
hb_set_tag_names_add_whole_literal(set, "col");
|
||||
hb_set_tag_names_add_whole_literal(set, "embed");
|
||||
hb_set_tag_names_add_whole_literal(set, "hr");
|
||||
hb_set_tag_names_add_whole_literal(set, "img");
|
||||
hb_set_tag_names_add_whole_literal(set, "input");
|
||||
hb_set_tag_names_add_whole_literal(set, "keygen");
|
||||
hb_set_tag_names_add_whole_literal(set, "link");
|
||||
hb_set_tag_names_add_whole_literal(set, "meta");
|
||||
hb_set_tag_names_add_whole_literal(set, "param");
|
||||
hb_set_tag_names_add_whole_literal(set, "source");
|
||||
hb_set_tag_names_add_whole_literal(set, "track");
|
||||
hb_set_tag_names_add_whole_literal(set, "wbr");
|
||||
void hb_rule_tag_void_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "area");
|
||||
hb_set_tag_names_add_whole_literal(set, "base");
|
||||
hb_set_tag_names_add_whole_literal(set, "br");
|
||||
hb_set_tag_names_add_whole_literal(set, "col");
|
||||
hb_set_tag_names_add_whole_literal(set, "embed");
|
||||
hb_set_tag_names_add_whole_literal(set, "hr");
|
||||
hb_set_tag_names_add_whole_literal(set, "img");
|
||||
hb_set_tag_names_add_whole_literal(set, "input");
|
||||
hb_set_tag_names_add_whole_literal(set, "keygen");
|
||||
hb_set_tag_names_add_whole_literal(set, "link");
|
||||
hb_set_tag_names_add_whole_literal(set, "meta");
|
||||
hb_set_tag_names_add_whole_literal(set, "param");
|
||||
hb_set_tag_names_add_whole_literal(set, "source");
|
||||
hb_set_tag_names_add_whole_literal(set, "track");
|
||||
hb_set_tag_names_add_whole_literal(set, "wbr");
|
||||
}
|
||||
|
||||
void hb_rule_tag_void_init(void) {
|
||||
hb_rule_tag_void_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_void_add_elems(hb_rule_tag_void_set);
|
||||
void hb_rule_tag_void_init(void)
|
||||
{
|
||||
hb_rule_tag_void_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_void_add_elems(hb_rule_tag_void_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_void_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_void_set, tag);
|
||||
bool hb_rule_tag_void_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_void_set, tag);
|
||||
}
|
||||
|
|
|
@ -1,21 +1,24 @@
|
|||
#include <hb/rune.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
|
||||
// "wss" stands for whitespace-sensitive.
|
||||
|
||||
static hb_set_tag_names* hb_rule_tag_wss_set;
|
||||
|
||||
void hb_rule_tag_wss_add_elems(hb_set_tag_names* set) {
|
||||
hb_set_tag_names_add_whole_literal(set, "code");
|
||||
hb_set_tag_names_add_whole_literal(set, "pre");
|
||||
void hb_rule_tag_wss_add_elems(hb_set_tag_names* set)
|
||||
{
|
||||
hb_set_tag_names_add_whole_literal(set, "code");
|
||||
hb_set_tag_names_add_whole_literal(set, "pre");
|
||||
}
|
||||
|
||||
void hb_rule_tag_wss_init(void) {
|
||||
hb_rule_tag_wss_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_wss_add_elems(hb_rule_tag_wss_set);
|
||||
void hb_rule_tag_wss_init(void)
|
||||
{
|
||||
hb_rule_tag_wss_set = hb_set_tag_names_create();
|
||||
hb_rule_tag_wss_add_elems(hb_rule_tag_wss_set);
|
||||
}
|
||||
|
||||
bool hb_rule_tag_wss_check(nh_view_str* tag) {
|
||||
return hb_set_tag_names_has(hb_rule_tag_wss_set, tag);
|
||||
bool hb_rule_tag_wss_check(nh_view_str* tag)
|
||||
{
|
||||
return hb_set_tag_names_has(hb_rule_tag_wss_set, tag);
|
||||
}
|
||||
|
|
|
@ -2,15 +2,20 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
// EOF represents the end of an input buffer, and is used for some functions that return characters.
|
||||
// It must be a value that would never appear in any valid UTF-8 byte sequence.
|
||||
// EOF represents the end of an input buffer, and is used for some functions
|
||||
// that return characters. It must be a value that would never appear in any
|
||||
// valid UTF-8 byte sequence.
|
||||
#define HB_EOF -1
|
||||
|
||||
// This version of hyperbuild is designed for ASCII and works with UTF-8 (with minor exceptions), so each character is one byte.
|
||||
// Use char to maximise compatibility with external and standard libraries.
|
||||
// This version of hyperbuild is designed for ASCII and works with UTF-8 (with
|
||||
// minor exceptions), so each character is one byte. Use char to maximise
|
||||
// compatibility with external and standard libraries.
|
||||
typedef char hb_rune;
|
||||
// When either a character or EOF needs to be returned, a character will be represented by a valid hb_rune value and EOF will be represented by HB_EOF.
|
||||
// In this case, since HB_EOF fits within the valid values of hb_rune, no separate type is needed. A separate type is still used to symbolically represent possible HB_EOF return values.
|
||||
// When either a character or EOF needs to be returned, a character will be
|
||||
// represented by a valid hb_rune value and EOF will be represented by HB_EOF.
|
||||
// In this case, since HB_EOF fits within the valid values of hb_rune, no
|
||||
// separate type is needed. A separate type is still used to symbolically
|
||||
// represent possible HB_EOF return values.
|
||||
typedef char hb_eof_rune;
|
||||
|
||||
#define hb_string_literal_length(str) (sizeof(str) - 1)
|
||||
|
|
|
@ -5,13 +5,17 @@
|
|||
#define HB_UNIT_ENTITY_NONE -1
|
||||
|
||||
typedef enum {
|
||||
HB_UNIT_ATTR_QUOTED,
|
||||
HB_UNIT_ATTR_UNQUOTED,
|
||||
HB_UNIT_ATTR_NOVAL,
|
||||
// Special value for hb_unit_tag.
|
||||
HB_UNIT_ATTR_NONE,
|
||||
|
||||
HB_UNIT_ATTR_QUOTED,
|
||||
HB_UNIT_ATTR_UNQUOTED,
|
||||
HB_UNIT_ATTR_NOVAL,
|
||||
} hb_unit_attr_type;
|
||||
|
||||
hb_unit_attr_type hb_unit_attr(hb_proc* proc);
|
||||
hb_unit_attr_type hb_unit_attr_val_quoted(hb_proc* proc, bool should_collapse_and_trim_value_ws);
|
||||
hb_unit_attr_type
|
||||
hb_unit_attr_val_quoted(hb_proc* proc, bool should_collapse_and_trim_value_ws);
|
||||
void hb_unit_attr_val_unquoted(hb_proc* proc);
|
||||
|
||||
void hb_unit_bang(hb_proc* proc);
|
||||
|
@ -19,7 +23,10 @@ void hb_unit_bang(hb_proc* proc);
|
|||
void hb_unit_comment(hb_proc* proc);
|
||||
|
||||
void hb_unit_content_html(hb_proc* proc, nh_view_str* parent);
|
||||
void hb_unit_content_script(hb_proc* proc);
|
||||
void hb_unit_content_style(hb_proc* proc);
|
||||
|
||||
int32_t hb_unit_entity(hb_proc* proc);
|
||||
|
||||
void hb_unit_tag(hb_proc* proc, nh_view_str* parent);
|
||||
nh_view_str hb_unit_tag_name(hb_proc* proc);
|
||||
|
|
|
@ -1,40 +1,49 @@
|
|||
#include <stdbool.h>
|
||||
#include <hb/collection.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/unit.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
hb_unit_attr_type hb_unit_attr(hb_proc* proc) {
|
||||
hb_proc_view_init_src(name, proc);
|
||||
hb_unit_attr_type hb_unit_attr(hb_proc* proc)
|
||||
{
|
||||
hb_proc_view_init_src(name, proc);
|
||||
|
||||
hb_proc_view_start_with_src_next(&name, proc);
|
||||
do {
|
||||
// Require at least one character.
|
||||
hb_rune c = hb_proc_require_predicate(proc, &hb_rule_attr_name_check, "attribute name");
|
||||
hb_proc_view_start_with_src_next(&name, proc);
|
||||
do {
|
||||
// Require at least one character.
|
||||
hb_rune c = hb_proc_require_predicate(
|
||||
proc, &hb_rule_attr_name_check, "attribute name");
|
||||
|
||||
if (hb_rule_ascii_uppercase_check(c)) {
|
||||
hb_proc_error_if_not_suppressed(proc, HB_ERR_PARSE_UCASE_ATTR, "Uppercase letter in attribute name");
|
||||
}
|
||||
} while (hb_rule_attr_name_check(hb_proc_peek(proc)));
|
||||
hb_proc_view_end_with_src_prev(&name, proc);
|
||||
if (hb_rule_ascii_uppercase_check(c)) {
|
||||
hb_proc_error_if_not_suppressed(
|
||||
proc, HB_ERR_PARSE_UCASE_ATTR,
|
||||
"Uppercase letter in attribute name");
|
||||
}
|
||||
} while (hb_rule_attr_name_check(hb_proc_peek(proc)));
|
||||
hb_proc_view_end_with_src_prev(&name, proc);
|
||||
|
||||
bool should_collapse_and_trim_value_ws = nh_view_str_equals_literal_i(&name, "class") && proc->cfg->trim_class_attributes;
|
||||
bool has_value = hb_proc_accept_if(proc, '=');
|
||||
hb_unit_attr_type attr_type = HB_UNIT_ATTR_NOVAL;
|
||||
bool should_collapse_and_trim_value_ws =
|
||||
nh_view_str_equals_literal_i(&name, "class")
|
||||
&& proc->cfg->trim_class_attributes;
|
||||
bool has_value = hb_proc_accept_if(proc, '=');
|
||||
hb_unit_attr_type attr_type = HB_UNIT_ATTR_NOVAL;
|
||||
|
||||
if (has_value) {
|
||||
hb_rune next = hb_proc_peek(proc);
|
||||
if (has_value) {
|
||||
hb_rune next = hb_proc_peek(proc);
|
||||
|
||||
if (hb_rule_attr_quote_check(next)) {
|
||||
// Quoted attribute value.
|
||||
attr_type = hb_unit_attr_val_quoted(proc, should_collapse_and_trim_value_ws);
|
||||
} else {
|
||||
// Unquoted attribute value.
|
||||
hb_proc_error_if_not_suppressed(proc, HB_ERR_PARSE_UNQUOTED_ATTR, "Unquoted attribute value");
|
||||
attr_type = HB_UNIT_ATTR_UNQUOTED;
|
||||
hb_unit_attr_val_unquoted(proc);
|
||||
}
|
||||
}
|
||||
if (hb_rule_attr_quote_check(next)) {
|
||||
// Quoted attribute value.
|
||||
attr_type = hb_unit_attr_val_quoted(
|
||||
proc, should_collapse_and_trim_value_ws);
|
||||
} else {
|
||||
// Unquoted attribute value.
|
||||
hb_proc_error_if_not_suppressed(
|
||||
proc, HB_ERR_PARSE_UNQUOTED_ATTR,
|
||||
"Unquoted attribute value");
|
||||
attr_type = HB_UNIT_ATTR_UNQUOTED;
|
||||
hb_unit_attr_val_unquoted(proc);
|
||||
}
|
||||
}
|
||||
|
||||
return attr_type;
|
||||
return attr_type;
|
||||
}
|
||||
|
|
|
@ -5,172 +5,215 @@
|
|||
#define _ENCODED_SINGLE_QUOTE "'"
|
||||
#define _ENCODED_DOUBLE_QUOTE """
|
||||
|
||||
#define _COLLAPSE_WHITESPACE_IF_APPLICABLE() \
|
||||
if (last_char_was_whitespace) { \
|
||||
/* This is the first non-whitespace character after one or more whitespace character(s), so collapse whitespace by writing only one space. */ \
|
||||
hb_proc_write(proc, ' '); \
|
||||
has_whitespace_after_processing = true; \
|
||||
last_char_was_whitespace = false; \
|
||||
}
|
||||
#define _COLLAPSE_WHITESPACE_IF_APPLICABLE() \
|
||||
if (last_char_was_whitespace) { \
|
||||
/* This is the first non-whitespace character after one or \
|
||||
* more whitespace character(s), so collapse whitespace by \
|
||||
* writing only one space. */ \
|
||||
hb_proc_write(proc, ' '); \
|
||||
has_whitespace_after_processing = true; \
|
||||
last_char_was_whitespace = false; \
|
||||
}
|
||||
|
||||
hb_unit_attr_type hb_unit_attr_val_quoted(hb_proc* proc, bool should_collapse_and_trim_ws) {
|
||||
// Processing a quoted attribute value is tricky, due to the fact that it's not possible to know whether or not to unquote the value until the value has been processed.
|
||||
// For example, decoding an entity could create whitespace in a value which might otherwise be unquotable.
|
||||
// How this function works is:
|
||||
//
|
||||
// 1. Assume that the value is unquotable, and don't output any quotes. Decode any entities as necessary. Collect metrics on the types of characters in the value while processing.
|
||||
// 2. Based on the metrics, if it's possible to not use quotes, nothing needs to be done and the function ends.
|
||||
// 3. Choose a quote based on the amount of occurrences, to minimise the amount of encoded values.
|
||||
// 4. Post-process the output by adding delimiter quotes and encoding quotes in values. This does mean that the output is written to twice.
|
||||
hb_unit_attr_type hb_unit_attr_val_quoted(hb_proc* proc,
|
||||
bool should_collapse_and_trim_ws)
|
||||
{
|
||||
// Processing a quoted attribute value is tricky, due to the fact that
|
||||
// it's not possible to know whether or not to unquote the value until
|
||||
// the value has been processed. For example, decoding an entity could
|
||||
// create whitespace in a value which might otherwise be unquotable. How
|
||||
// this function works is:
|
||||
//
|
||||
// 1. Assume that the value is unquotable, and don't output any quotes.
|
||||
// Decode any entities as necessary. Collect metrics on the types of
|
||||
// characters in the value while processing.
|
||||
// 2. Based on the metrics, if it's possible to not use quotes, nothing
|
||||
// needs to be done and the function ends.
|
||||
// 3. Choose a quote based on the amount of occurrences, to minimise the
|
||||
// amount of encoded values.
|
||||
// 4. Post-process the output by adding delimiter quotes and encoding
|
||||
// quotes in values. This does mean that the output is written to twice.
|
||||
|
||||
bool should_decode_entities = proc->cfg->decode_entities;
|
||||
bool should_remove_quotes = proc->cfg->remove_attr_quotes;
|
||||
bool should_decode_entities = proc->cfg->decode_entities;
|
||||
bool should_remove_quotes = proc->cfg->remove_attr_quotes;
|
||||
|
||||
// Metrics for characters in the value.
|
||||
// Used to decide what quotes to use, if any.
|
||||
size_t count_double_quotation = 0;
|
||||
size_t count_single_quotation = 0;
|
||||
bool starts_with_quote = false;
|
||||
bool has_whitespace_after_processing = false;
|
||||
// Metrics for characters in the value.
|
||||
// Used to decide what quotes to use, if any.
|
||||
size_t count_double_quotation = 0;
|
||||
size_t count_single_quotation = 0;
|
||||
bool starts_with_quote = false;
|
||||
bool has_whitespace_after_processing = false;
|
||||
|
||||
hb_rune quote = hb_proc_require_skip_predicate(proc, &hb_rule_attr_quote_check, "attribute value quote");
|
||||
hb_rune quote = hb_proc_require_skip_predicate(
|
||||
proc, &hb_rule_attr_quote_check, "attribute value quote");
|
||||
|
||||
if (should_collapse_and_trim_ws) {
|
||||
hb_proc_skip_while_predicate(proc, &hb_rule_ascii_whitespace_check);
|
||||
}
|
||||
if (should_collapse_and_trim_ws) {
|
||||
hb_proc_skip_while_predicate(proc,
|
||||
&hb_rule_ascii_whitespace_check);
|
||||
}
|
||||
|
||||
// Since it's not possible to optimise the delimiter quotes without knowing the complete value,
|
||||
// mark the processed value in the output for post-processing later.
|
||||
hb_proc_view_init_out(proc_value, proc);
|
||||
// Since it's not possible to optimise the delimiter quotes without
|
||||
// knowing the complete value, mark the processed value in the output
|
||||
// for post-processing later.
|
||||
hb_proc_view_init_out(proc_value, proc);
|
||||
|
||||
hb_proc_view_start_with_out_next(&proc_value, proc);
|
||||
bool last_char_was_whitespace = false;
|
||||
bool is_first_char = true;
|
||||
while (true) {
|
||||
int32_t c = hb_proc_peek(proc);
|
||||
hb_proc_view_start_with_out_next(&proc_value, proc);
|
||||
bool last_char_was_whitespace = false;
|
||||
bool is_first_char = true;
|
||||
while (true) {
|
||||
int32_t c = hb_proc_peek(proc);
|
||||
|
||||
if (c == quote) {
|
||||
break;
|
||||
}
|
||||
if (c == quote) {
|
||||
break;
|
||||
}
|
||||
|
||||
bool processed_entity = c == '&';
|
||||
if (processed_entity) {
|
||||
// If not decoding entities, then this is first non-whitespace if last_char_was_whitespace, so space needs to be written before hb_unit_entity writes entity.
|
||||
if (!should_decode_entities) {
|
||||
_COLLAPSE_WHITESPACE_IF_APPLICABLE()
|
||||
}
|
||||
bool processed_entity = c == '&';
|
||||
if (processed_entity) {
|
||||
// If not decoding entities, then this is first
|
||||
// non-whitespace if last_char_was_whitespace, so space
|
||||
// needs to be written before hb_unit_entity writes
|
||||
// entity.
|
||||
if (!should_decode_entities) {
|
||||
_COLLAPSE_WHITESPACE_IF_APPLICABLE()
|
||||
}
|
||||
|
||||
// Characters will be consumed by hb_unit_entity, but they will never be '\'', '"', or whitespace,
|
||||
// as the function only consumes characters that could form a well formed entity.
|
||||
// See the function for more details.
|
||||
int32_t decoded = hb_unit_entity(proc);
|
||||
// If not decoding entities, don't interpret using decoded character.
|
||||
if (should_decode_entities) c = decoded;
|
||||
}
|
||||
bool is_whitespace = hb_rule_ascii_whitespace_check(c);
|
||||
// Characters will be consumed by hb_unit_entity, but
|
||||
// they will never be '\'', '"', or whitespace, as the
|
||||
// function only consumes characters that could form a
|
||||
// well formed entity. See the function for more
|
||||
// details.
|
||||
int32_t decoded = hb_unit_entity(proc);
|
||||
// If not decoding entities, don't interpret using
|
||||
// decoded character.
|
||||
if (should_decode_entities)
|
||||
c = decoded;
|
||||
}
|
||||
bool is_whitespace = hb_rule_ascii_whitespace_check(c);
|
||||
|
||||
if (should_collapse_and_trim_ws && is_whitespace) {
|
||||
// Character, after any entity decoding, is whitespace.
|
||||
// Don't write whitespace.
|
||||
// In order to collapse whitespace, only write one space character once the first non-whitespace character after a sequence of whitespace characters is reached.
|
||||
last_char_was_whitespace = true;
|
||||
hb_proc_skip(proc);
|
||||
if (should_collapse_and_trim_ws && is_whitespace) {
|
||||
// Character, after any entity decoding, is whitespace.
|
||||
// Don't write whitespace.
|
||||
// In order to collapse whitespace, only write one space
|
||||
// character once the first non-whitespace character
|
||||
// after a sequence of whitespace characters is reached.
|
||||
last_char_was_whitespace = true;
|
||||
hb_proc_skip(proc);
|
||||
|
||||
} else {
|
||||
// Character, after any entity decoding, is not whitespace.
|
||||
_COLLAPSE_WHITESPACE_IF_APPLICABLE()
|
||||
} else {
|
||||
// Character, after any entity decoding, is not
|
||||
// whitespace.
|
||||
_COLLAPSE_WHITESPACE_IF_APPLICABLE()
|
||||
|
||||
if (c == '"') {
|
||||
if (is_first_char) starts_with_quote = true;
|
||||
count_double_quotation++;
|
||||
if (c == '"') {
|
||||
if (is_first_char)
|
||||
starts_with_quote = true;
|
||||
count_double_quotation++;
|
||||
|
||||
} else if (c == '\'') {
|
||||
if (is_first_char) starts_with_quote = true;
|
||||
count_single_quotation++;
|
||||
} else if (c == '\'') {
|
||||
if (is_first_char)
|
||||
starts_with_quote = true;
|
||||
count_single_quotation++;
|
||||
|
||||
} else if (is_whitespace) {
|
||||
// `should_collapse_and_trim_ws` is false, so whitespace is written.
|
||||
has_whitespace_after_processing = true;
|
||||
}
|
||||
} else if (is_whitespace) {
|
||||
// `should_collapse_and_trim_ws` is false, so
|
||||
// whitespace is written.
|
||||
has_whitespace_after_processing = true;
|
||||
}
|
||||
|
||||
if (!processed_entity) {
|
||||
// Don't need to accept if hb_unit_entity has already been called.
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
if (!processed_entity) {
|
||||
// Don't need to accept if hb_unit_entity has
|
||||
// already been called.
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
|
||||
is_first_char = false;
|
||||
}
|
||||
hb_proc_view_end_with_out_prev(&proc_value, proc);
|
||||
hb_proc_require_skip(proc, quote);
|
||||
is_first_char = false;
|
||||
}
|
||||
hb_proc_view_end_with_out_prev(&proc_value, proc);
|
||||
hb_proc_require_skip(proc, quote);
|
||||
|
||||
size_t proc_length = nh_view_str_length(&proc_value);
|
||||
size_t proc_length = nh_view_str_length(&proc_value);
|
||||
|
||||
// Technically, the specification states that values may only be unquoted if they don't contain ["'`=<>].
|
||||
// However, browsers seem to interpret characters after `=` and before the nearest whitespace as an unquoted value, so long as no quote immediately follows `=`.
|
||||
// If a value cannot be unquoted, use the one that appears the least and therefore requires the least amount of encoding.
|
||||
// Prefer double quotes to single quotes if it's a tie.
|
||||
hb_rune quote_to_encode;
|
||||
char const* quote_encoded;
|
||||
size_t quote_encoded_length;
|
||||
size_t amount_of_quotes_to_encode;
|
||||
// Technically, the specification states that values may only be
|
||||
// unquoted if they don't contain ["'`=<>]. However, browsers seem to
|
||||
// interpret characters after `=` and before the nearest whitespace as
|
||||
// an unquoted value, so long as no quote immediately follows `=`. If a
|
||||
// value cannot be unquoted, use the one that appears the least and
|
||||
// therefore requires the least amount of encoding. Prefer double quotes
|
||||
// to single quotes if it's a tie.
|
||||
hb_rune quote_to_encode;
|
||||
char const* quote_encoded;
|
||||
size_t quote_encoded_length;
|
||||
size_t amount_of_quotes_to_encode;
|
||||
|
||||
if (should_remove_quotes && proc_length > 0 && !has_whitespace_after_processing && !starts_with_quote) {
|
||||
// No need to do any further processing; processed value is already in unquoted form.
|
||||
return HB_UNIT_ATTR_UNQUOTED;
|
||||
if (should_remove_quotes && proc_length > 0
|
||||
&& !has_whitespace_after_processing && !starts_with_quote) {
|
||||
// No need to do any further processing; processed value is
|
||||
// already in unquoted form.
|
||||
return HB_UNIT_ATTR_UNQUOTED;
|
||||
|
||||
} else if (!should_decode_entities) {
|
||||
// If entities are not being decoded, we are not allowed to encode and decode quotes to minimise the total count of encoded quotes.
|
||||
// Therefore, there is no use to swapping delimiter quotes as at best it's not an improvement and at worst it could break the value.
|
||||
quote_to_encode = quote;
|
||||
quote_encoded = NULL;
|
||||
quote_encoded_length = 0;
|
||||
amount_of_quotes_to_encode = 0;
|
||||
} else if (!should_decode_entities) {
|
||||
// If entities are not being decoded, we are not allowed to
|
||||
// encode and decode quotes to minimise the total count of
|
||||
// encoded quotes. Therefore, there is no use to swapping
|
||||
// delimiter quotes as at best it's not an improvement and at
|
||||
// worst it could break the value.
|
||||
quote_to_encode = quote;
|
||||
quote_encoded = NULL;
|
||||
quote_encoded_length = 0;
|
||||
amount_of_quotes_to_encode = 0;
|
||||
|
||||
} else if (count_single_quotation < count_double_quotation) {
|
||||
quote_to_encode = '\'';
|
||||
quote_encoded = _ENCODED_SINGLE_QUOTE;
|
||||
quote_encoded_length = hb_string_literal_length(_ENCODED_SINGLE_QUOTE);
|
||||
amount_of_quotes_to_encode = count_single_quotation;
|
||||
} else if (count_single_quotation < count_double_quotation) {
|
||||
quote_to_encode = '\'';
|
||||
quote_encoded = _ENCODED_SINGLE_QUOTE;
|
||||
quote_encoded_length =
|
||||
hb_string_literal_length(_ENCODED_SINGLE_QUOTE);
|
||||
amount_of_quotes_to_encode = count_single_quotation;
|
||||
|
||||
} else {
|
||||
quote_to_encode = '"';
|
||||
quote_encoded = _ENCODED_DOUBLE_QUOTE;
|
||||
quote_encoded_length = hb_string_literal_length(_ENCODED_DOUBLE_QUOTE);
|
||||
amount_of_quotes_to_encode = count_double_quotation;
|
||||
}
|
||||
} else {
|
||||
quote_to_encode = '"';
|
||||
quote_encoded = _ENCODED_DOUBLE_QUOTE;
|
||||
quote_encoded_length =
|
||||
hb_string_literal_length(_ENCODED_DOUBLE_QUOTE);
|
||||
amount_of_quotes_to_encode = count_double_quotation;
|
||||
}
|
||||
|
||||
size_t post_length = 2 + proc_length - amount_of_quotes_to_encode + (amount_of_quotes_to_encode * quote_encoded_length);
|
||||
// Where the post-processed output should start in the output array.
|
||||
size_t out_start = nh_view_str_start(&proc_value);
|
||||
size_t proc_end = out_start + proc_length - 1;
|
||||
size_t post_end = out_start + post_length - 1;
|
||||
size_t post_length =
|
||||
2 + proc_length - amount_of_quotes_to_encode
|
||||
+ (amount_of_quotes_to_encode * quote_encoded_length);
|
||||
// Where the post-processed output should start in the output array.
|
||||
size_t out_start = nh_view_str_start(&proc_value);
|
||||
size_t proc_end = out_start + proc_length - 1;
|
||||
size_t post_end = out_start + post_length - 1;
|
||||
|
||||
size_t reader = proc_end;
|
||||
size_t writer = post_end;
|
||||
proc->out[writer--] = quote_to_encode;
|
||||
// To prevent overwriting data when encoding quotes, post-process output in reverse.
|
||||
// Loop condition is checked at end of loop instead of before to prevent underflow.
|
||||
// WARNING: This code directly uses and manipulates struct members of `proc`, which in general should be avoided.
|
||||
while (true) {
|
||||
hb_rune c = proc->out[reader];
|
||||
if (should_decode_entities && c == quote_to_encode) {
|
||||
writer -= quote_encoded_length;
|
||||
// WARNING: This only works because hb_rune == char.
|
||||
memcpy(&proc->out[writer + 1], quote_encoded, quote_encoded_length * sizeof(hb_rune));
|
||||
} else {
|
||||
proc->out[writer--] = c;
|
||||
}
|
||||
size_t reader = proc_end;
|
||||
size_t writer = post_end;
|
||||
proc->out[writer--] = quote_to_encode;
|
||||
// To prevent overwriting data when encoding quotes, post-process output
|
||||
// in reverse. Loop condition is checked at end of loop instead of
|
||||
// before to prevent underflow. WARNING: This code directly uses and
|
||||
// manipulates struct members of `proc`, which in general should be
|
||||
// avoided.
|
||||
while (true) {
|
||||
hb_rune c = proc->out[reader];
|
||||
if (should_decode_entities && c == quote_to_encode) {
|
||||
writer -= quote_encoded_length;
|
||||
// WARNING: This only works because hb_rune == char.
|
||||
memcpy(&proc->out[writer + 1], quote_encoded,
|
||||
quote_encoded_length * sizeof(hb_rune));
|
||||
} else {
|
||||
proc->out[writer--] = c;
|
||||
}
|
||||
|
||||
// Break before decrementing to prevent underflow.
|
||||
if (reader == out_start) {
|
||||
break;
|
||||
}
|
||||
reader--;
|
||||
}
|
||||
// This must be done after previous loop to prevent overwriting data.
|
||||
proc->out[writer] = quote_to_encode;
|
||||
proc->out_next = post_end + 1;
|
||||
// Break before decrementing to prevent underflow.
|
||||
if (reader == out_start) {
|
||||
break;
|
||||
}
|
||||
reader--;
|
||||
}
|
||||
// This must be done after previous loop to prevent overwriting data.
|
||||
proc->out[writer] = quote_to_encode;
|
||||
proc->out_next = post_end + 1;
|
||||
|
||||
return HB_UNIT_ATTR_QUOTED;
|
||||
return HB_UNIT_ATTR_QUOTED;
|
||||
}
|
||||
|
|
|
@ -1,28 +1,32 @@
|
|||
#include <stdbool.h>
|
||||
#include <hb/unit.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/unit.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
void hb_unit_attr_val_unquoted(hb_proc* proc) {
|
||||
bool at_least_one_char = false;
|
||||
void hb_unit_attr_val_unquoted(hb_proc* proc)
|
||||
{
|
||||
bool at_least_one_char = false;
|
||||
|
||||
hb_rune c;
|
||||
while (true) {
|
||||
c = hb_proc_peek(proc);
|
||||
if (!hb_rule_attr_unquotedvalue_check(c)) {
|
||||
break;
|
||||
}
|
||||
at_least_one_char = true;
|
||||
hb_rune c;
|
||||
while (true) {
|
||||
c = hb_proc_peek(proc);
|
||||
if (!hb_rule_attr_unquotedvalue_check(c)) {
|
||||
break;
|
||||
}
|
||||
at_least_one_char = true;
|
||||
|
||||
if (c == '&') {
|
||||
// Process entity.
|
||||
hb_unit_entity(proc);
|
||||
} else {
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
if (c == '&') {
|
||||
// Process entity.
|
||||
hb_unit_entity(proc);
|
||||
} else {
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
|
||||
if (!at_least_one_char) {
|
||||
hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND, "Expected unquoted attribute value, got `%c` (U+%x)", c);
|
||||
}
|
||||
if (!at_least_one_char) {
|
||||
hb_proc_error_custom(
|
||||
proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected unquoted attribute value, got `%c` (U+%x)",
|
||||
c);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#include <hb/unit.h>
|
||||
|
||||
void hb_unit_bang(hb_proc* proc) {
|
||||
hb_proc_require_match(proc, "<!");
|
||||
void hb_unit_bang(hb_proc* proc)
|
||||
{
|
||||
hb_proc_require_match(proc, "<!");
|
||||
|
||||
while (hb_proc_accept_if_not(proc, '<'));
|
||||
while (hb_proc_accept_if_not(proc, '<'))
|
||||
;
|
||||
|
||||
hb_proc_require(proc, '>');
|
||||
hb_proc_require(proc, '>');
|
||||
}
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
#include <hb/unit.h>
|
||||
|
||||
void hb_unit_comment(hb_proc* proc) {
|
||||
// Mark comment to write it later if not removing comments.
|
||||
hb_proc_view_init_src(comment, proc);
|
||||
void hb_unit_comment(hb_proc* proc)
|
||||
{
|
||||
// Mark comment to write it later if not removing comments.
|
||||
hb_proc_view_init_src(comment, proc);
|
||||
|
||||
hb_proc_view_start_with_src_next(&comment, proc);
|
||||
hb_proc_require_skip_match(proc, "<!--");
|
||||
while (!hb_proc_skip_if_matches(proc, "-->")) {
|
||||
hb_proc_skip(proc);
|
||||
}
|
||||
hb_proc_view_end_with_src_prev(&comment, proc);
|
||||
hb_proc_view_start_with_src_next(&comment, proc);
|
||||
hb_proc_require_skip_match(proc, "<!--");
|
||||
while (!hb_proc_skip_if_matches(proc, "-->")) {
|
||||
hb_proc_skip(proc);
|
||||
}
|
||||
hb_proc_view_end_with_src_prev(&comment, proc);
|
||||
|
||||
// Write comment if not removing comments.
|
||||
if (proc->cfg->remove_comments) {
|
||||
hb_proc_write_view(proc, &comment);
|
||||
}
|
||||
// Write comment if not removing comments.
|
||||
if (proc->cfg->remove_comments) {
|
||||
hb_proc_write_view(proc, &comment);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,65 +1,192 @@
|
|||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/rune.h>
|
||||
#include <hb/unit.h>
|
||||
|
||||
// Ensure COMMENT, BANG, and OPENING_TAG are together, and update _state_is_cbot if values are changed.
|
||||
// Ensure COMMENT, BANG, and OPENING_TAG are together, and update _state_is_cbot
|
||||
// if values are changed.
|
||||
typedef enum {
|
||||
_STATE_COMMENT,
|
||||
_STATE_BANG,
|
||||
_STATE_OPENING_TAG,
|
||||
_STATE_COMMENT,
|
||||
_STATE_BANG,
|
||||
_STATE_OPENING_TAG,
|
||||
|
||||
_STATE_END,
|
||||
_STATE_ENTITY,
|
||||
_STATE_TEXT,
|
||||
_STATE_START,
|
||||
_STATE_END,
|
||||
_STATE_ENTITY,
|
||||
_STATE_WHITESPACE,
|
||||
_STATE_TEXT,
|
||||
} _state;
|
||||
|
||||
static bool _state_is_cbot(_state state) {
|
||||
return state >= _STATE_COMMENT &&
|
||||
state <= _STATE_OPENING_TAG;
|
||||
static bool _state_is_cbot(_state state)
|
||||
{
|
||||
return state >= _STATE_COMMENT && state <= _STATE_OPENING_TAG;
|
||||
}
|
||||
|
||||
static _state _get_next_state(hb_proc *proc) {
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
static _state _get_next_state(hb_proc* proc)
|
||||
{
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
|
||||
if (c == HB_EOF || hb_proc_matches(proc, "</")) {
|
||||
return _STATE_END;
|
||||
}
|
||||
if (c != HB_EOF && hb_rule_ascii_whitespace_check(c)) {
|
||||
return _STATE_WHITESPACE;
|
||||
}
|
||||
|
||||
if (hb_proc_matches(proc, "<!--")) {
|
||||
return _STATE_COMMENT;
|
||||
}
|
||||
if (c == HB_EOF || hb_proc_matches(proc, "</")) {
|
||||
return _STATE_END;
|
||||
}
|
||||
|
||||
// Check after comment
|
||||
if (hb_proc_matches(proc, "<!")) {
|
||||
return _STATE_BANG;
|
||||
}
|
||||
if (hb_proc_matches(proc, "<!--")) {
|
||||
return _STATE_COMMENT;
|
||||
}
|
||||
|
||||
// Check after comment and bang
|
||||
if (c == '<') {
|
||||
return _STATE_OPENING_TAG;
|
||||
}
|
||||
// Check after comment
|
||||
if (hb_proc_matches(proc, "<!")) {
|
||||
return _STATE_BANG;
|
||||
}
|
||||
|
||||
if (c == '&') {
|
||||
return _STATE_ENTITY;
|
||||
}
|
||||
// Check after comment and bang
|
||||
if (c == '<') {
|
||||
return _STATE_OPENING_TAG;
|
||||
}
|
||||
|
||||
return _STATE_TEXT;
|
||||
if (c == '&') {
|
||||
return _STATE_ENTITY;
|
||||
}
|
||||
|
||||
return _STATE_TEXT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Whitespace handling is the trickiest part of this function.
|
||||
* There are three potential minification settings that affect whitespace handling:
|
||||
* There are three potential minification settings that affect whitespace
|
||||
* handling:
|
||||
* - collapse
|
||||
* - destroy whole
|
||||
* - trim
|
||||
* What whitespace to minify depends on the parent and configured settings.
|
||||
* We want to prevent memory allocation and use only one pass, but whitespace handling often involves looking ahead.
|
||||
* We want to prevent memory allocation and use only one pass, but whitespace
|
||||
* handling often involves looking ahead.
|
||||
*/
|
||||
void hb_unit_content_html(hb_proc* proc, nh_view_str* parent) {
|
||||
bool is_first_char = true;
|
||||
// Set to true when $whitespace is instantiated when $is_first_char is true
|
||||
bool whitespace_at_beginning = false;
|
||||
void hb_unit_content_html(hb_proc* proc, nh_view_str* parent)
|
||||
{
|
||||
bool should_collapse_whitespace =
|
||||
hb_cfg_should_min(&proc->cfg->collapse_whitespace, parent);
|
||||
bool should_destroy_whole_whitespace =
|
||||
hb_cfg_should_min(&proc->cfg->destroy_whole_whitespace, parent);
|
||||
bool should_trim_whitespace =
|
||||
hb_cfg_should_min(&proc->cfg->trim_whitespace, parent);
|
||||
|
||||
// Set to true after calling hb_unit_{comment|bang|tag}
|
||||
bool returned_from_comment_bang_or_tag = false;
|
||||
bool whitespace_after_right_chevron = false;
|
||||
// Trim leading whitespace if configured to do so.
|
||||
if (should_trim_whitespace) {
|
||||
hb_proc_skip_while_predicate(proc,
|
||||
&hb_rule_ascii_whitespace_check);
|
||||
}
|
||||
|
||||
_state last_state = _STATE_START;
|
||||
hb_proc_view_init_src(whitespace, proc);
|
||||
// Whether or not currently in whitespace.
|
||||
bool whitespace_buffered = false;
|
||||
// If currently in whitespace, whether or not current contiguous
|
||||
// whitespace started after a bang, comment, or tag.
|
||||
bool whitespace_started_after_cbot = false;
|
||||
|
||||
while (true) {
|
||||
_state next_state = _get_next_state(proc);
|
||||
|
||||
if (next_state == _STATE_WHITESPACE) {
|
||||
// Whitespace is always buffered and then processed
|
||||
// afterwards, even if not minifying.
|
||||
hb_proc_skip(proc);
|
||||
|
||||
if (last_state != _STATE_WHITESPACE) {
|
||||
// This is the start of one or more whitespace
|
||||
// characters, so start a view of this
|
||||
// contiguous whitespace and don't write any
|
||||
// characters that are part of it yet.
|
||||
hb_proc_view_start_with_src_next(&whitespace,
|
||||
proc);
|
||||
whitespace_buffered = true;
|
||||
whitespace_started_after_cbot =
|
||||
_state_is_cbot(last_state);
|
||||
} else {
|
||||
// This is part of a contiguous whitespace, but
|
||||
// not the start of, so simply ignore.
|
||||
}
|
||||
|
||||
} else {
|
||||
// Next character is not whitespace, so handle any
|
||||
// previously buffered whitespace.
|
||||
if (whitespace_buffered) {
|
||||
// Mark the end of the whitespace.
|
||||
hb_proc_view_end_with_src_prev(&whitespace,
|
||||
proc);
|
||||
|
||||
if (should_destroy_whole_whitespace
|
||||
&& whitespace_started_after_cbot
|
||||
&& _state_is_cbot(next_state)) {
|
||||
// Whitespace is between two tags,
|
||||
// comments, or bangs.
|
||||
// destroy_whole_whitespace is on, so
|
||||
// don't write it.
|
||||
|
||||
} else if (should_trim_whitespace
|
||||
&& next_state == _STATE_END) {
|
||||
// Whitespace is trailing.
|
||||
// should_trim_whitespace is on, so
|
||||
// don't write it.
|
||||
|
||||
} else if (should_collapse_whitespace) {
|
||||
// Current contiguous whitespace needs
|
||||
// to be reduced to a single space
|
||||
// character.
|
||||
hb_proc_write(proc, ' ');
|
||||
|
||||
} else {
|
||||
// Whitespace cannot be minified, so
|
||||
// write in entirety.
|
||||
hb_proc_write_view(proc, &whitespace);
|
||||
}
|
||||
|
||||
// Reset whitespace buffer.
|
||||
whitespace_buffered = false;
|
||||
}
|
||||
|
||||
// Process and consume next character(s).
|
||||
switch (next_state) {
|
||||
case _STATE_COMMENT:
|
||||
hb_unit_comment(proc);
|
||||
break;
|
||||
|
||||
case _STATE_BANG:
|
||||
hb_unit_bang(proc);
|
||||
break;
|
||||
|
||||
case _STATE_OPENING_TAG:
|
||||
hb_unit_tag(proc, parent);
|
||||
break;
|
||||
|
||||
case _STATE_END:
|
||||
break;
|
||||
|
||||
case _STATE_ENTITY:
|
||||
hb_unit_entity(proc);
|
||||
break;
|
||||
|
||||
case _STATE_TEXT:
|
||||
hb_proc_accept(proc);
|
||||
break;
|
||||
|
||||
default:
|
||||
// Defensive coding.
|
||||
hb_proc_error(
|
||||
proc,
|
||||
HB_ERR_INTERR_UNKNOWN_CONTENT_NEXT_STATE,
|
||||
"Unknown content type");
|
||||
}
|
||||
}
|
||||
|
||||
last_state = next_state;
|
||||
if (next_state == _STATE_END) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
#include <hb/proc.h>
|
||||
|
||||
static void _parse_comment_single(hb_proc* proc)
|
||||
{
|
||||
hb_proc_require_match(proc, "//");
|
||||
|
||||
// Comment can end at closing </script>.
|
||||
// WARNING: Closing tag must not contain whitespace.
|
||||
while (!hb_proc_accept_if_matches_line_terminator(proc)) {
|
||||
if (hb_proc_matches_i(proc, "</script>")) {
|
||||
break;
|
||||
}
|
||||
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
|
||||
static void _parse_comment_multi(hb_proc* proc)
|
||||
{
|
||||
hb_proc_require_match(proc, "/*");
|
||||
|
||||
// Comment can end at closing </script>.
|
||||
// WARNING: Closing tag must not contain whitespace.
|
||||
while (!hb_proc_accept_if_matches(proc, "*/")) {
|
||||
if (hb_proc_matches_i(proc, "</script>")) {
|
||||
break;
|
||||
}
|
||||
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
|
||||
static void _parse_string(hb_proc* proc)
|
||||
{
|
||||
hb_rune delim = hb_proc_accept(proc);
|
||||
|
||||
if (delim != '"' && delim != '\'') {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected JavaScript string delimiter");
|
||||
}
|
||||
|
||||
bool escaping = false;
|
||||
|
||||
while (true) {
|
||||
hb_rune c = hb_proc_accept(proc);
|
||||
|
||||
if (c == '\\') {
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == delim && !escaping) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (hb_proc_accept_if_matches_line_terminator(proc)) {
|
||||
if (!escaping) {
|
||||
hb_proc_error(proc,
|
||||
HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Unterminated JavaScript string");
|
||||
}
|
||||
}
|
||||
|
||||
escaping = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void _parse_template(hb_proc* proc)
|
||||
{
|
||||
hb_proc_require_match(proc, "`");
|
||||
|
||||
bool escaping = false;
|
||||
|
||||
while (true) {
|
||||
hb_rune c = hb_proc_accept(proc);
|
||||
|
||||
if (c == '\\') {
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '`' && !escaping) {
|
||||
break;
|
||||
}
|
||||
|
||||
escaping = false;
|
||||
}
|
||||
}
|
||||
|
||||
void hb_unit_content_script(hb_proc* proc)
|
||||
{
|
||||
while (!hb_proc_matches(proc, "</")) {
|
||||
if (hb_proc_matches(proc, "//")) {
|
||||
_parse_comment_single(proc);
|
||||
} else if (hb_proc_matches(proc, "/*")) {
|
||||
_parse_comment_multi(proc);
|
||||
} else {
|
||||
switch (hb_proc_peek(proc)) {
|
||||
case '\'':
|
||||
case '"':
|
||||
_parse_string(proc);
|
||||
break;
|
||||
|
||||
case '`':
|
||||
_parse_template(proc);
|
||||
break;
|
||||
|
||||
default:
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
#include <hb/proc.h>
|
||||
|
||||
static void _parse_comment(hb_proc* proc)
|
||||
{
|
||||
hb_proc_require_match(proc, "/*");
|
||||
|
||||
// Unlike script tags, style comments do NOT end at closing tag.
|
||||
while (!hb_proc_accept_if_matches(proc, "*/")) {
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
|
||||
static void _parse_string(hb_proc* proc)
|
||||
{
|
||||
hb_rune delim = hb_proc_accept(proc);
|
||||
|
||||
if (delim != '"' && delim != '\'') {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Expected CSS string delimiter");
|
||||
}
|
||||
|
||||
bool escaping = false;
|
||||
|
||||
while (true) {
|
||||
hb_rune c = hb_proc_accept(proc);
|
||||
|
||||
if (c == '\\') {
|
||||
escaping = !escaping;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == delim && !escaping) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (hb_proc_accept_if_matches_line_terminator(proc)) {
|
||||
if (!escaping) {
|
||||
hb_proc_error(proc,
|
||||
HB_ERR_PARSE_EXPECTED_NOT_FOUND,
|
||||
"Unterminated CSS string");
|
||||
}
|
||||
}
|
||||
|
||||
escaping = false;
|
||||
}
|
||||
}
|
||||
|
||||
void hb_unit_content_style(hb_proc* proc)
|
||||
{
|
||||
while (!hb_proc_matches(proc, "</")) {
|
||||
if (hb_proc_matches(proc, "/*")) {
|
||||
_parse_comment(proc);
|
||||
} else {
|
||||
switch (hb_proc_peek(proc)) {
|
||||
case '\'':
|
||||
case '"':
|
||||
_parse_string(proc);
|
||||
break;
|
||||
default:
|
||||
hb_proc_accept(proc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,180 +1,221 @@
|
|||
#include <hb/unit.h>
|
||||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/unit.h>
|
||||
|
||||
// The minimum length of any entity is 3, which is a character entity reference with a single character name.
|
||||
// The longest UTF-8 representation of a Unicode code point is 4 bytes.
|
||||
// Because there are no character entity references with a name of length 1, it's always better to decode entities for minification purposes.
|
||||
// The minimum length of any entity is 3, which is a character entity reference
|
||||
// with a single character name. The longest UTF-8 representation of a Unicode
|
||||
// code point is 4 bytes. Because there are no character entity references with
|
||||
// a name of length 1, it's always better to decode entities for minification
|
||||
// purposes.
|
||||
|
||||
// Based on the data sourced from https://www.w3.org/TR/html5/entities.json as of 2019-04-20T04:00:00.000Z:
|
||||
// Based on the data sourced from https://www.w3.org/TR/html5/entities.json as
|
||||
// of 2019-04-20T04:00:00.000Z:
|
||||
// - Entity names can have [A-Za-z0-9] characters, and are case sensitive.
|
||||
// - Some character entity references do not need to end with a semicolon.
|
||||
// - The longest name is "CounterClockwiseContourIntegral", with length 31 (excluding leading ampersand and trailing semicolon).
|
||||
// - The longest name is "CounterClockwiseContourIntegral", with length 31
|
||||
// (excluding leading ampersand and trailing semicolon).
|
||||
// - All entity names are at least 2 characters long.
|
||||
|
||||
// Browser implementation behaviour to consider:
|
||||
// - It is unclear what happens if an entity name does not match case sensitively but matches two or more case insensitively.
|
||||
// - For example, given "AlphA" or "aLpha", does the browser choose "alpha" or "Alpha"?
|
||||
// - It is unclear what happens if an entity name does not match case
|
||||
// sensitively but matches two or more case insensitively.
|
||||
// - For example, given "AlphA" or "aLpha", does the browser choose "alpha" or
|
||||
// "Alpha"?
|
||||
// - Do browsers render valid entities without trailing semicolons?
|
||||
// - For example, how do browsers interpret "Chuck-&-Cheese", "1&1", and "&e;"?
|
||||
// - For example, how do browsers interpret "Chuck-&-Cheese", "1&1", and
|
||||
// "&e;"?
|
||||
|
||||
// hyperbuild implementation:
|
||||
// - Entities must start with an ampersand and end with a semicolon.
|
||||
// - Once an ampersand is encountered, it and the sequence of characters following must match the following ECMAScript regular expression to be considered a well formed entity:
|
||||
// - Once an ampersand is encountered, it and the sequence of characters
|
||||
// following must match the following ECMAScript regular expression to be
|
||||
// considered a well formed entity:
|
||||
//
|
||||
// /&(#(x[0-9a-f]{1-6}|[0-9]{1,7}))|[a-z0-9]{2,31};/i
|
||||
//
|
||||
// - If the sequence of characters following an ampersand do not combine to form a well formed entity, the ampersand is considered a bare ampersand.
|
||||
// - A bare ampersand is an ampersand that is interpreted literally and not as the start of an entity.
|
||||
// - hyperbuild looks ahead without consuming to check if the following characters would form a well formed entity. If they don't, only the longest subsequence that could form a well formed entity is consumed.
|
||||
// - An entity is considered invalid if it is well formed but represents a non-existent Unicode code point or reference name.
|
||||
// - If the sequence of characters following an ampersand do not combine to form
|
||||
// a well formed entity, the ampersand is considered a bare ampersand.
|
||||
// - A bare ampersand is an ampersand that is interpreted literally and not as
|
||||
// the start of an entity.
|
||||
// - hyperbuild looks ahead without consuming to check if the following
|
||||
// characters would form a well formed entity. If they don't, only the longest
|
||||
// subsequence that could form a well formed entity is consumed.
|
||||
// - An entity is considered invalid if it is well formed but represents a
|
||||
// non-existent Unicode code point or reference name.
|
||||
|
||||
#define _MAX_UNICODE_CODE_POINT 0x10FFFF
|
||||
|
||||
typedef enum {
|
||||
_TYPE_MALFORMED,
|
||||
_TYPE_NAME,
|
||||
_TYPE_DECIMAL,
|
||||
_TYPE_HEXADECIMAL
|
||||
_TYPE_MALFORMED,
|
||||
_TYPE_NAME,
|
||||
_TYPE_DECIMAL,
|
||||
_TYPE_HEXADECIMAL
|
||||
} _type;
|
||||
|
||||
typedef bool _valid_char_predicate(hb_rune c);
|
||||
|
||||
static int32_t _parse_decimal(nh_view_str* view) {
|
||||
int32_t val = 0;
|
||||
nh_view_for(view, i, _, len) {
|
||||
char c = nh_view_str_get(view, i);
|
||||
val = val * 10 + (c - '0');
|
||||
}
|
||||
return val > _MAX_UNICODE_CODE_POINT ? -1 : val;
|
||||
static int32_t _parse_decimal(nh_view_str* view)
|
||||
{
|
||||
int32_t val = 0;
|
||||
nh_view_for(view, i, _, len)
|
||||
{
|
||||
char c = nh_view_str_get(view, i);
|
||||
val = val * 10 + (c - '0');
|
||||
}
|
||||
return val > _MAX_UNICODE_CODE_POINT ? -1 : val;
|
||||
}
|
||||
|
||||
static int32_t _parse_hexadecimal(nh_view_str* view) {
|
||||
int32_t val = 0;
|
||||
nh_view_for(view, i, _, len) {
|
||||
char c = nh_view_str_get(view, i);
|
||||
int32_t digit =
|
||||
hb_rule_ascii_digit_check(c) ? c - '0' :
|
||||
hb_rule_ascii_uppercase_check(c) ? c - 'A' + 10 :
|
||||
c - 'a' + 10;
|
||||
val = val * 16 + digit;
|
||||
}
|
||||
return val > _MAX_UNICODE_CODE_POINT ? -1 : val;
|
||||
static int32_t _parse_hexadecimal(nh_view_str* view)
|
||||
{
|
||||
int32_t val = 0;
|
||||
nh_view_for(view, i, _, len)
|
||||
{
|
||||
char c = nh_view_str_get(view, i);
|
||||
int32_t digit = hb_rule_ascii_digit_check(c)
|
||||
? c - '0'
|
||||
: hb_rule_ascii_uppercase_check(c)
|
||||
? c - 'A' + 10
|
||||
: c - 'a' + 10;
|
||||
val = val * 16 + digit;
|
||||
}
|
||||
return val > _MAX_UNICODE_CODE_POINT ? -1 : val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process an HTML entity.
|
||||
*
|
||||
* @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the entity is malformed or invalid
|
||||
* @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
|
||||
* entity is malformed or invalid
|
||||
*/
|
||||
int32_t hb_unit_entity(hb_proc* proc) {
|
||||
// View of the entire entity, including leading ampersand and any trailing semicolon.
|
||||
hb_proc_view_init_src(entity, proc);
|
||||
hb_proc_view_start_with_src_next(&entity, proc);
|
||||
hb_proc_require_skip(proc, '&');
|
||||
int32_t hb_unit_entity(hb_proc* proc)
|
||||
{
|
||||
// View of the entire entity, including leading ampersand and any
|
||||
// trailing semicolon.
|
||||
hb_proc_view_init_src(entity, proc);
|
||||
hb_proc_view_start_with_src_next(&entity, proc);
|
||||
hb_proc_require_skip(proc, '&');
|
||||
|
||||
// The input can end at any time after initial ampersand.
|
||||
// Examples of valid complete source code: "&", "&a", "&#", "	", "&".
|
||||
// The input can end at any time after initial ampersand.
|
||||
// Examples of valid complete source code: "&", "&a", "&#", "	",
|
||||
// "&".
|
||||
|
||||
// There are three stages to this function:
|
||||
//
|
||||
// 1. Determine the type of entity, so we can know how to parse and validate the following characters.
|
||||
// - This can be done by simply looking at the first and second characters after the initial ampersand, e.g. "&#", "&#x", "&a".
|
||||
// 2. Parse the entity data, i.e. the characters between the ampersand and semicolon.
|
||||
// - To avoid parsing forever on malformed entities without semicolons, there is an upper bound on the amount of possible characters, based on the type of entity detected from the first stage.
|
||||
// 3. Interpret and validate the data.
|
||||
// - This simply checks if it refers to a valid Unicode code point or entity reference name.
|
||||
// There are three stages to this function:
|
||||
//
|
||||
// 1. Determine the type of entity, so we can know how to parse and
|
||||
// validate the following characters.
|
||||
// - This can be done by simply looking at the first and second
|
||||
// characters after the initial ampersand, e.g. "&#", "&#x", "&a".
|
||||
// 2. Parse the entity data, i.e. the characters between the ampersand
|
||||
// and semicolon.
|
||||
// - To avoid parsing forever on malformed entities without
|
||||
// semicolons, there is an upper bound on the amount of possible
|
||||
// characters, based on the type of entity detected from the first
|
||||
// stage.
|
||||
// 3. Interpret and validate the data.
|
||||
// - This simply checks if it refers to a valid Unicode code point or
|
||||
// entity reference name.
|
||||
|
||||
// First stage: determine the type of entity.
|
||||
_valid_char_predicate* predicate;
|
||||
_type type;
|
||||
size_t min_len;
|
||||
size_t max_len;
|
||||
// First stage: determine the type of entity.
|
||||
_valid_char_predicate* predicate;
|
||||
_type type;
|
||||
size_t min_len;
|
||||
size_t max_len;
|
||||
|
||||
if (hb_proc_skip_if_matches(proc, "#x")) {
|
||||
predicate = &hb_rule_ascii_hex_check;
|
||||
type = _TYPE_HEXADECIMAL;
|
||||
min_len = 1;
|
||||
max_len = 6;
|
||||
if (hb_proc_skip_if_matches(proc, "#x")) {
|
||||
predicate = &hb_rule_ascii_hex_check;
|
||||
type = _TYPE_HEXADECIMAL;
|
||||
min_len = 1;
|
||||
max_len = 6;
|
||||
|
||||
} else if (hb_proc_skip_if(proc, '#')) {
|
||||
predicate = &hb_rule_ascii_digit_check;
|
||||
type = _TYPE_DECIMAL;
|
||||
min_len = 1;
|
||||
max_len = 7;
|
||||
} else if (hb_proc_skip_if(proc, '#')) {
|
||||
predicate = &hb_rule_ascii_digit_check;
|
||||
type = _TYPE_DECIMAL;
|
||||
min_len = 1;
|
||||
max_len = 7;
|
||||
|
||||
} else if (hb_rule_entity_reference_valid_name_char(hb_proc_peek_eof(proc))) {
|
||||
predicate = &hb_rule_entity_reference_valid_name_char;
|
||||
type = _TYPE_NAME;
|
||||
min_len = 2;
|
||||
max_len = 31;
|
||||
} else if (hb_rule_entity_reference_valid_name_char(
|
||||
hb_proc_peek_eof(proc))) {
|
||||
predicate = &hb_rule_entity_reference_valid_name_char;
|
||||
type = _TYPE_NAME;
|
||||
min_len = 2;
|
||||
max_len = 31;
|
||||
|
||||
} else {
|
||||
hb_proc_error_if_not_suppressed(proc, HB_ERR_PARSE_MALFORMED_ENTITY, "Malformed entity");
|
||||
// Output bare ampersand.
|
||||
hb_proc_write(proc, '&');
|
||||
return HB_UNIT_ENTITY_NONE;
|
||||
}
|
||||
} else {
|
||||
hb_proc_error_if_not_suppressed(proc,
|
||||
HB_ERR_PARSE_MALFORMED_ENTITY,
|
||||
"Malformed entity");
|
||||
// Output bare ampersand.
|
||||
hb_proc_write(proc, '&');
|
||||
return HB_UNIT_ENTITY_NONE;
|
||||
}
|
||||
|
||||
// Second stage: try to parse a well formed entity.
|
||||
// If the entity is not well formed, either throw an error or interpret literally (depending on configuration).
|
||||
hb_proc_view_init_src(data, proc);
|
||||
hb_proc_view_start_with_src_next(&data, proc);
|
||||
for (size_t i = 0; i < max_len; i++) {
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
// Character ends entity.
|
||||
if (c == ';') {
|
||||
break;
|
||||
}
|
||||
// Character would not form well formed entity.
|
||||
if (!(*predicate)(c)) {
|
||||
type = _TYPE_MALFORMED;
|
||||
break;
|
||||
}
|
||||
// Character is valid.
|
||||
hb_proc_skip(proc);
|
||||
}
|
||||
hb_proc_view_end_with_src_prev(&data, proc);
|
||||
if (nh_view_str_length(&data) < min_len) type = _TYPE_MALFORMED;
|
||||
// Don't try to consume semicolon if entity is not well formed already.
|
||||
if (type != _TYPE_MALFORMED && !hb_proc_skip_if(proc, ';')) type = _TYPE_MALFORMED;
|
||||
hb_proc_view_end_with_src_prev(&entity, proc);
|
||||
// Second stage: try to parse a well formed entity.
|
||||
// If the entity is not well formed, either throw an error or interpret
|
||||
// literally (depending on configuration).
|
||||
hb_proc_view_init_src(data, proc);
|
||||
hb_proc_view_start_with_src_next(&data, proc);
|
||||
for (size_t i = 0; i < max_len; i++) {
|
||||
hb_eof_rune c = hb_proc_peek_eof(proc);
|
||||
// Character ends entity.
|
||||
if (c == ';') {
|
||||
break;
|
||||
}
|
||||
// Character would not form well formed entity.
|
||||
if (!(*predicate)(c)) {
|
||||
type = _TYPE_MALFORMED;
|
||||
break;
|
||||
}
|
||||
// Character is valid.
|
||||
hb_proc_skip(proc);
|
||||
}
|
||||
hb_proc_view_end_with_src_prev(&data, proc);
|
||||
if (nh_view_str_length(&data) < min_len)
|
||||
type = _TYPE_MALFORMED;
|
||||
// Don't try to consume semicolon if entity is not well formed already.
|
||||
if (type != _TYPE_MALFORMED && !hb_proc_skip_if(proc, ';'))
|
||||
type = _TYPE_MALFORMED;
|
||||
hb_proc_view_end_with_src_prev(&entity, proc);
|
||||
|
||||
if (type == _TYPE_MALFORMED) {
|
||||
hb_proc_error_if_not_suppressed(proc, HB_ERR_PARSE_MALFORMED_ENTITY, "Malformed entity");
|
||||
// Write longest subsequence of characters that could form a well formed entity.
|
||||
hb_proc_write_view(proc, &entity);
|
||||
return HB_UNIT_ENTITY_NONE;
|
||||
}
|
||||
if (type == _TYPE_MALFORMED) {
|
||||
hb_proc_error_if_not_suppressed(proc,
|
||||
HB_ERR_PARSE_MALFORMED_ENTITY,
|
||||
"Malformed entity");
|
||||
// Write longest subsequence of characters that could form a
|
||||
// well formed entity.
|
||||
hb_proc_write_view(proc, &entity);
|
||||
return HB_UNIT_ENTITY_NONE;
|
||||
}
|
||||
|
||||
// Third stage: validate entity and decode if configured to do so.
|
||||
int32_t uchar = -1;
|
||||
switch (type) {
|
||||
case _TYPE_NAME:
|
||||
uchar = hb_rule_entity_reference_get_code_point(&data);
|
||||
break;
|
||||
// Third stage: validate entity and decode if configured to do so.
|
||||
int32_t uchar = -1;
|
||||
switch (type) {
|
||||
case _TYPE_NAME:
|
||||
uchar = hb_rule_entity_reference_get_code_point(&data);
|
||||
break;
|
||||
|
||||
case _TYPE_DECIMAL:
|
||||
uchar = _parse_decimal(&data);
|
||||
break;
|
||||
case _TYPE_DECIMAL:
|
||||
uchar = _parse_decimal(&data);
|
||||
break;
|
||||
|
||||
case _TYPE_HEXADECIMAL:
|
||||
uchar = _parse_hexadecimal(&data);
|
||||
break;
|
||||
case _TYPE_HEXADECIMAL:
|
||||
uchar = _parse_hexadecimal(&data);
|
||||
break;
|
||||
|
||||
default:
|
||||
// Defensive coding.
|
||||
hb_proc_error(proc, HB_ERR_INTERR_UNKNOWN_ENTITY_TYPE, "Unknown entity type");
|
||||
}
|
||||
if (uchar == -1) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_INVALID_ENTITY, "Invalid entity");
|
||||
}
|
||||
default:
|
||||
// Defensive coding.
|
||||
hb_proc_error(proc, HB_ERR_INTERR_UNKNOWN_ENTITY_TYPE,
|
||||
"Unknown entity type");
|
||||
}
|
||||
if (uchar == -1) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_INVALID_ENTITY,
|
||||
"Invalid entity");
|
||||
}
|
||||
|
||||
if (proc->cfg->decode_entities) {
|
||||
hb_proc_write_utf_8(proc, uchar);
|
||||
} else {
|
||||
hb_proc_write_view(proc, &entity);
|
||||
}
|
||||
if (proc->cfg->decode_entities) {
|
||||
hb_proc_write_utf_8(proc, uchar);
|
||||
} else {
|
||||
hb_proc_write_view(proc, &entity);
|
||||
}
|
||||
|
||||
return uchar;
|
||||
return uchar;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/unit.h>
|
||||
|
||||
void hb_unit_tag(hb_proc* proc, nh_view_str* parent)
|
||||
{
|
||||
hb_proc_require(proc, '<');
|
||||
nh_view_str name = hb_unit_tag_name(proc);
|
||||
|
||||
// Check that this tag is allowed directly under its parent.
|
||||
if (!hb_rule_tag_parent_whitelist_allowed(&name, parent)
|
||||
|| !hb_rule_tag_child_whitelist_allowed(parent, &name)
|
||||
|| !hb_rule_tag_parent_blacklist_allowed(&name, parent)
|
||||
|| !hb_rule_tag_child_blacklist_allowed(parent, &name)) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_ILLEGAL_CHILD,
|
||||
"Tag can't be a child here");
|
||||
}
|
||||
|
||||
hb_unit_attr_type last_attr_type = HB_UNIT_ATTR_NONE;
|
||||
bool self_closing = false;
|
||||
|
||||
while (true) {
|
||||
// At the beginning of this loop, the last parsed unit was
|
||||
// either the tag name or an attribute (including its value, if
|
||||
// it had one).
|
||||
size_t ws_accepted;
|
||||
if (proc->cfg->remove_tag_whitespace) {
|
||||
ws_accepted = hb_proc_skip_while_predicate(
|
||||
proc, &hb_rule_ascii_whitespace_check);
|
||||
} else {
|
||||
ws_accepted = hb_proc_accept_while_predicate(
|
||||
proc, &hb_rule_ascii_whitespace_check);
|
||||
}
|
||||
|
||||
if (hb_proc_accept_if(proc, '>')) {
|
||||
// End of tag.
|
||||
break;
|
||||
}
|
||||
|
||||
if ((self_closing = hb_proc_accept_if_matches(proc, "/>"))) {
|
||||
hb_proc_error_if_not_suppressed(
|
||||
proc, HB_ERR_PARSE_SELF_CLOSING_TAG,
|
||||
"Self-closing tag");
|
||||
break;
|
||||
}
|
||||
|
||||
// HB_ERR_PARSE_NO_SPACE_BEFORE_ATTR is not suppressible as
|
||||
// otherwise there would be difficulty in determining what is
|
||||
// the end of a tag/attribute name/attribute value.
|
||||
if (!ws_accepted) {
|
||||
hb_proc_error(proc, HB_ERR_PARSE_NO_SPACE_BEFORE_ATTR,
|
||||
"No whitespace before attribute");
|
||||
}
|
||||
|
||||
if (proc->cfg->remove_tag_whitespace) {
|
||||
if (last_attr_type != HB_UNIT_ATTR_QUOTED) {
|
||||
hb_proc_write(proc, ' ');
|
||||
}
|
||||
}
|
||||
|
||||
last_attr_type = hb_unit_attr(proc);
|
||||
}
|
||||
|
||||
if (self_closing || hb_rule_tag_void_check(&name)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nh_view_str_equals_literal_i(&name, "script")) {
|
||||
// <script> tag.
|
||||
hb_unit_content_script(proc);
|
||||
} else if (nh_view_str_equals_literal_i(&name, "style")) {
|
||||
// <style> tag.
|
||||
hb_unit_content_style(proc);
|
||||
} else {
|
||||
// Standard HTML.
|
||||
hb_unit_content_html(proc, &name);
|
||||
}
|
||||
|
||||
// Require closing tag for non-void.
|
||||
hb_proc_require_match(proc, "</");
|
||||
nh_view_str closing_name = hb_unit_tag_name(proc);
|
||||
if (!nh_view_str_equals(&name, &closing_name)) {
|
||||
// TODO Find a way to cleanly provide opening and closing tag
|
||||
// names (which are views) into error message without leaking
|
||||
// memory.
|
||||
hb_proc_error(proc, HB_ERR_PARSE_UNCLOSED_TAG,
|
||||
"Tag not closed");
|
||||
}
|
||||
hb_proc_require(proc, '>');
|
||||
}
|
|
@ -2,23 +2,28 @@
|
|||
#include <hb/proc.h>
|
||||
#include <hb/rule.h>
|
||||
|
||||
nh_view_str hb_unit_tag_name(hb_proc* proc) {
|
||||
hb_proc_view_init_src(name, proc);
|
||||
nh_view_str hb_unit_tag_name(hb_proc* proc)
|
||||
{
|
||||
hb_proc_view_init_src(name, proc);
|
||||
|
||||
hb_proc_view_start_with_src_next(&name, proc);
|
||||
do {
|
||||
// Require at least one character.
|
||||
hb_rune c = hb_proc_require_predicate(proc, &hb_rule_tag_name_check, "tag name");
|
||||
hb_proc_view_start_with_src_next(&name, proc);
|
||||
do {
|
||||
// Require at least one character.
|
||||
hb_rune c = hb_proc_require_predicate(
|
||||
proc, &hb_rule_tag_name_check, "tag name");
|
||||
|
||||
if (hb_rule_ascii_uppercase_check(c)) {
|
||||
hb_proc_error_if_not_suppressed(proc, HB_ERR_PARSE_UCASE_TAG, "Uppercase letter in tag name");
|
||||
}
|
||||
} while (hb_rule_tag_name_check(hb_proc_peek(proc)));
|
||||
hb_proc_view_end_with_src_prev(&name, proc);
|
||||
if (hb_rule_ascii_uppercase_check(c)) {
|
||||
hb_proc_error_if_not_suppressed(
|
||||
proc, HB_ERR_PARSE_UCASE_TAG,
|
||||
"Uppercase letter in tag name");
|
||||
}
|
||||
} while (hb_rule_tag_name_check(hb_proc_peek(proc)));
|
||||
hb_proc_view_end_with_src_prev(&name, proc);
|
||||
|
||||
if (!hb_rule_tag_valid_check(&name)) {
|
||||
hb_proc_error_if_not_suppressed(proc, HB_ERR_PARSE_NONSTANDARD_TAG, "Non-standard tag");
|
||||
}
|
||||
if (!hb_rule_tag_valid_check(&name)) {
|
||||
hb_proc_error_if_not_suppressed(
|
||||
proc, HB_ERR_PARSE_NONSTANDARD_TAG, "Non-standard tag");
|
||||
}
|
||||
|
||||
return name;
|
||||
return name;
|
||||
}
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
void hb_init(void) {
|
||||
// Set up rules
|
||||
hb_rule_init();
|
||||
// Set up config
|
||||
hb_config_init();
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
shopt -s globstar
|
||||
|
||||
pushd "$(dirname "$0")" > /dev/null
|
||||
|
||||
tmp_out="$(mktemp)"
|
||||
fail_count=0
|
||||
for test_file in ./test/**/*.c; do
|
||||
gcc --std=c11 -Wall -Wextra -Werror -O0 -g -Ilib -Isrc -Itest -o "$tmp_out" "$test_file" lib/**/*.c src/**/*.c || exit 1
|
||||
"$tmp_out"
|
||||
res="$?"
|
||||
if [[ $res != 0 ]]; then
|
||||
fail_count=$((fail_count + 1))
|
||||
fi;
|
||||
done
|
||||
|
||||
if [[ $fail_count == 0 ]]; then
|
||||
echo "All tests passed successfully"
|
||||
fi
|
||||
|
||||
popd > /dev/null
|
|
@ -1,57 +1,65 @@
|
|||
#include <stdio.h>
|
||||
#include <hb/rule.h>
|
||||
#include <hb/hyperbuild.h>
|
||||
#include <hb/unit.h>
|
||||
#include <hbtest.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// An attribute value:
|
||||
// - delimited by double quotes
|
||||
// - containing one single quote literal
|
||||
// - containing one single quote encoded
|
||||
// - containing three double quotes encoded
|
||||
// - with multiple whitespace sequences of length 2 and higher, including at the start and end
|
||||
// - with multiple whitespace sequences of length 2 and higher, including at
|
||||
// the start and end
|
||||
#define INPUT "\" abc''" "" a \" 1"
|
||||
|
||||
int main(void) {
|
||||
printf("Test started\n");
|
||||
hb_rule_init();
|
||||
int main(void)
|
||||
{
|
||||
hyperbuild_init();
|
||||
|
||||
hb_err_set* suppressed = hb_err_set_create();
|
||||
hb_err_set* suppressed = hb_err_set_create();
|
||||
|
||||
hb_rune* out = calloc(sizeof(INPUT) - 1, sizeof(hb_rune));
|
||||
hb_rune* src = malloc(sizeof(INPUT) + 1);
|
||||
memcpy(src, INPUT "\xFF", sizeof(INPUT) + 1);
|
||||
|
||||
hb_cfg cfg = {
|
||||
.collapse_whitespace = {
|
||||
.mode = HB_CFG_SET_MODE_ALL,
|
||||
.set = NULL,
|
||||
},
|
||||
.destroy_whole_whitespace = {
|
||||
.mode = HB_CFG_SET_MODE_ALL,
|
||||
.set = NULL,
|
||||
},
|
||||
.trim_whitespace = {
|
||||
.mode = HB_CFG_SET_MODE_ALL,
|
||||
.set = NULL,
|
||||
},
|
||||
.suppressed_errors = *suppressed,
|
||||
.trim_class_attributes = true,
|
||||
.decode_entities = true,
|
||||
.remove_attr_quotes = true,
|
||||
.remove_comments = true,
|
||||
.remove_optional_tags = true,
|
||||
.remove_tag_whitespace = true,
|
||||
};
|
||||
hb_cfg cfg = {
|
||||
.collapse_whitespace =
|
||||
{
|
||||
.mode = HB_CFG_SET_MODE_ALL,
|
||||
.set = NULL,
|
||||
},
|
||||
.destroy_whole_whitespace =
|
||||
{
|
||||
.mode = HB_CFG_SET_MODE_ALL,
|
||||
.set = NULL,
|
||||
},
|
||||
.trim_whitespace =
|
||||
{
|
||||
.mode = HB_CFG_SET_MODE_ALL,
|
||||
.set = NULL,
|
||||
},
|
||||
.suppressed_errors = *suppressed,
|
||||
.trim_class_attributes = true,
|
||||
.decode_entities = true,
|
||||
.remove_attr_quotes = true,
|
||||
.remove_comments = true,
|
||||
.remove_optional_tags = true,
|
||||
.remove_tag_whitespace = true,
|
||||
};
|
||||
|
||||
hb_proc proc = {
|
||||
.cfg = &cfg,
|
||||
.name = "test",
|
||||
.src = INPUT "\xFF",
|
||||
.src_len = sizeof(INPUT) - 1,
|
||||
.src_next = 0,
|
||||
.out = out,
|
||||
.out_next = 0,
|
||||
};
|
||||
hb_proc proc = {
|
||||
.cfg = &cfg,
|
||||
.src = src,
|
||||
.src_len = sizeof(INPUT) - 1,
|
||||
.src_next = 0,
|
||||
.out = src,
|
||||
.out_next = 0,
|
||||
};
|
||||
|
||||
hb_unit_attr_type type = hb_unit_attr_val_quoted(&proc, true);
|
||||
printf("%s\n", out);
|
||||
hb_unit_attr_val_quoted(&proc, true);
|
||||
|
||||
hb_err_set_destroy(suppressed);
|
||||
src[proc.out_next] = 0;
|
||||
printf("%s\n", src);
|
||||
|
||||
hb_err_set_destroy(suppressed);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define expect(cond, msg) \
|
||||
if (!cond) \
|
||||
fprintf(stderr, "Test failed: " msg " [%s %s() line %d]", __FILE__, \
|
||||
__func__, __LINE__)
|
Loading…
Reference in New Issue