diff --git a/.gitignore b/.gitignore
index 5a27e7b..1b72444 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,2 @@
-/out/
-/docs/
-/cmake-build-*
 /Cargo.lock
+/target
diff --git a/Cargo.toml b/Cargo.toml
index 572ff2e..310de98 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,4 +5,4 @@ authors = ["Wilson Lin <code@wilsonl.in>"]
 edition = "2018"
 
 [dependencies]
-phf = "0.8.0"
+phf = { version = "0.8.0", features = ["macros"] }
diff --git a/README.md b/README.md
index 7aa4318..802332c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # hyperbuild
 
-A fast one-pass in-place HTML minifier written in C with advanced whitespace handling.
+A fast one-pass in-place HTML minifier written in Rust with advanced whitespace handling.
 
 Currently in beta, working on documentation and tests. Issues and pull requests welcome!
 
@@ -12,15 +12,7 @@ Currently in beta, working on documentation and tests. Issues and pull requests
 
 ## Usage
 
-This is the library. To use hyperbuild, you'll probably need one of these:
-
-- [hyperbuild CLI](https://github.com/wilsonzlin/hyperbuild-cli)
-
-Documentation for the library itself is currently WIP.
-
-hyperbuild uses the following dependencies, which are included as submodules:
-
-- [nicehash](https://github.com/wilsonzlin/nicehash)
+TODO
 
 ## Minification
 
diff --git a/archive/quoted.rs b/archive/quoted.rs
new file mode 100644
index 0000000..62c7137
--- /dev/null
+++ b/archive/quoted.rs
@@ -0,0 +1,130 @@
+fn tmp() -> () {
+    // TODO
+    loop {
+        let is_whitespace = is_whitespace(c);
+        if should_collapse_and_trim_ws && is_whitespace {
+            // Character, after any entity decoding, is whitespace.
+            // Don't write whitespace.
+            // In order to collapse whitespace, only write one space
+            // character once the first non-whitespace character
+            // after a sequence of whitespace characters is reached.
+            last_char_was_whitespace = true;
+            proc.skip();
+        } else {
+            // Character, after any entity decoding, is not whitespace.
+            if last_char_was_whitespace {
+                // This is the first non-whitespace character after one or more whitespace
+                // character(s), so collapse whitespace by writing only one space.
+                proc.write(b' ');
+                has_whitespace_after_processing = true;
+                last_char_was_whitespace = false;
+            };
+
+            if c == b'"' {
+                count_double_quotation += 1;
+            } else if c == b'\'' {
+                count_single_quotation += 1;
+            } else if is_whitespace {
+                // `should_collapse_and_trim_ws` is false, so
+                // whitespace is written.
+                has_whitespace_after_processing = true;
+            };
+
+            increment_count(c);
+            if !processed_entity {
+                // Don't need to accept if hb_unit_entity has
+                // already been called.
+                proc.accept();
+            };
+        };
+    }
+
+    // Since it's not possible to optimise the delimiter quotes without
+    // knowing the complete value, mark the processed value in the output
+    // for post-processing later.
+    let proc_value_start = proc.data.get_out_pos();
+    let mut is_first_char = true;
+
+    loop {
+        let processed_entity = c == b'&';
+        if processed_entity {
+            // Characters will be consumed by hb_unit_entity, but they will never be '\'', '"', or
+            // whitespace, as the function only consumes characters that could form a  well formed
+            // entity. See the function for more details.
+            // TODO Handle bad char
+            let decoded = process_entity(proc)?;
+            match decoded {
+                Some(e) => if e <= 0x7f { c = e as u8; } else { c = 0xff; },
+                None => c = 0xff,
+            };
+        }
+
+
+        is_first_char = false;
+    };
+    let proc_length = proc.data.get_out_pos() + 1 - proc_value_start;
+    proc.match_char(delimiter).require()?.discard();
+
+    // Technically, the specification states that values may only be
+    // unquoted if they don't contain ["'`=<>]. However, browsers seem to
+    // interpret characters after `=` and before the nearest whitespace as
+    // an unquoted value, so long as no quote immediately follows `=`. If a
+    // value cannot be unquoted, use the one that appears the least and
+    // therefore requires the least amount of encoding. Prefer double quotes
+    // to single quotes if it's a tie.
+    let quote_to_encode;
+    let quote_encoded;
+    let amount_of_quotes_to_encode;
+
+    if proc_length > 0 && !has_whitespace_after_processing && !starts_with_quote {
+        // No need to do any further processing; processed value is
+        // already in unquoted form.
+        return Ok(AttrType::Unquoted);
+    } else if count_single_quotation < count_double_quotation {
+        quote_to_encode = b'\'';
+        quote_encoded = ENCODED_SINGLE_QUOTE;
+        amount_of_quotes_to_encode = count_single_quotation;
+    } else {
+        quote_to_encode = b'"';
+        quote_encoded = ENCODED_DOUBLE_QUOTE;
+        amount_of_quotes_to_encode = count_double_quotation;
+    }
+
+    // TODO Improve; avoid direct memory access; clean API.
+    let post_length = 2 + proc_length - amount_of_quotes_to_encode + (amount_of_quotes_to_encode * quote_encoded.len());
+    // Where the post-processed output should start in the output array.
+    let out_start = proc_value_start;
+    let proc_end = out_start + proc_length - 1;
+    let post_end = out_start + post_length - 1;
+
+    let mut reader = proc_end;
+    let mut writer = post_end;
+    proc.data.set_out_char_at(writer, quote_to_encode);
+    writer -= 1;
+    // To prevent overwriting data when encoding quotes, post-process output
+    // in reverse. Loop condition is checked at end of loop instead of
+    // before to prevent underflow. WARNING: This code directly uses and
+    // manipulates struct members of `proc`, which in general should be
+    // avoided.
+    loop {
+        let c = proc.data.get_src_char_at(reader);
+        if c == quote_to_encode {
+            writer -= quote_encoded.len();
+            proc.data.replace_out_slice(writer + 1, quote_encoded);
+        } else {
+            proc.data.set_out_char_at(writer, c);
+            writer -= 1;
+        }
+
+        // Break before decrementing to prevent underflow.
+        if reader == out_start {
+            break;
+        }
+        reader -= 1;
+    }
+    // This must be done after previous loop to prevent overwriting data.
+    proc.data.set_out_char_at(writer, quote_to_encode);
+    proc.data.set_out_pos(post_end + 1);
+
+    Ok(AttrType::Quoted)
+}
diff --git a/cli/CMakeLists.txt b/cli/CMakeLists.txt
deleted file mode 100644
index 7484e61..0000000
--- a/cli/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-cmake_minimum_required(VERSION 3.14)
-project(hyperbuild-cli C)
-
-set(CMAKE_C_STANDARD 11)
-
-# TODO Include submodule config, don't hardcode submodule's dependencies
-include_directories(lib src ext/hyperbuild/lib)
-
-add_executable(hyperbuild-cli
-        src/hbcli/err.c
-        src/hbcli/opt.c
-        src/hbcli/arg/suppress.c
-        src/hbcli/main.c src/hbcli/arg/tags.c)
diff --git a/notes/Processing.md b/notes/Processing.md
new file mode 100644
index 0000000..e7ab5f9
--- /dev/null
+++ b/notes/Processing.md
@@ -0,0 +1,17 @@
+# Processing
+
+## Redundant requires
+
+Sometimes the code will look like it duplicates matching logic. For example:
+
+```rust
+fn process_comment(proc: &mut Proc) -> () {
+	proc.matches("<!--").require_reason("comment").skip();
+
+	proc.while_not_matches("-->").skip();
+
+	proc.matches("-->").require_reason("comment end").skip();
+}
+```
+
+At first glance, it might appear that the second call `while_not_matches` makes it redundant to require it again immediately afterwards. However, it's possible that the `while_not_matches` actually stops for some other reason, such as reaching EOF. Even if it's guaranteed, it's still nice to have a declared invariant, like an assertion statement.
diff --git a/notes/parsing/tag-omission.md b/notes/Tag omission.md
similarity index 100%
rename from notes/parsing/tag-omission.md
rename to notes/Tag omission.md
diff --git a/notes/code/error-handling.md b/notes/code/error-handling.md
deleted file mode 100644
index e6f881f..0000000
--- a/notes/code/error-handling.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Error handling
-
-## Error structs
-
-Errors are represented using `hbe_err_s` structs (type `hbe_err_t`). It has two fields:
-
-- `code`: A value from the enum `hbe_errcode` (type `hbe_errcode_t`).
-- `message`: A character array (`hb_char_t *`) describing the error and providing context.
-
-## Error-prone functions
-
-Every function that may result in errors should declare `hbe_err_t *hbe_err` as its first parameter.
-
-Functions can result in errors if:
-
-- it calls any function that may result in an error
-- it sets the variable pointed to by `hbe_err`
-
-If the function needs to do cleanup operations, it should declare a `finally:` label at the end of the function and put the cleanup code there. If the function returns a value, the function should start with a `rv_t rv = 0;` declaration (where `rv_t` is the return type), and the `finally` section should end with a `return rv;`.
-
-`rv` should be initialised because technically an error can occur at any time after it, including immediately afterwards.
-
-## Creating errors
-
-To create an error, use the `hbe_err_t hbe_error(hbe_errcode_t code, hb_char_t *message)` function.
-The result should be set to `*hbe_err`, and then the function should return.
-
-When an error occurs, the function should return some arbitrary return value such as `0`.
-Return values from a function call are not considered reliable if errors occurred during their execution.
-
-```c
-int error_prone(hbe_err_t *hbe_err, char *msg) {
-  if (some_error_condition) {
-    *hbe_err = hbe_error(1, "Bad!");
-    return 0;
-  }
-
-  printf("%s\n", msg);
-
-  return 42;
-}
-```
-
-To simplify this code, a macro is available:
-
-```c
-int error_prone(hbe_err_t *hbe_err, char *msg) {
-  if (some_error_condition) {
-    HBE_THROW(1, "Bad!");
-    /* Translates to:
-    *hbe_err = hbe_error(1, "Bad!");
-    return 0;
-    */
-  }
-
-  printf("%s\n", msg);
-
-  return 42;
-}
-```
-
-If the return type is `void`, use `HBE_THROW_V` instead of `HBE_THROW`.
-If there is a cleanup section, use `HBE_THROW_F`.
-
-## Handling errors
-
-When a function call may result in an error, pass `hbe_err` to the function and check if the value dereferenced is not `NULL`. If it isn't, an error occurred and the callee should return.
-
-The return value should not be used if an error occurred.
-
-```c
-int callee(hbe_err_t *hbe_err, int a, int b) {
-  int meaning_of_life = error_prone(hbe_err, "Yes");
-  if (*hbe_err != NULL) {
-    // An error occurred, $meaning_of_life is unreliable
-    return 0;
-  }
-
-  return 3;
-}
-```
-
-To simplify this code, a macro is available:
-
-```c
-int callee(hbe_err_t *hbe_err, int a, int b) {
-  int meaning_of_life = HBE_CATCH(error_prone, hbe_err, "Yes");
-  /* Translates to:
-  int meaning_of_life = error_prone(hbe_err, "Yes");
-  if (*hbe_err != NULL) {
-    return 0;
-  }
-  */
-
-  return 3;
-}
-```
-
-If the return type is `void`, use `HBE_CATCH_V` instead.
-If there is a cleanup section, use `HBE_CATCH_F`.
-
-## Returning with cleanup
-
-Use the macro `HBE_RETURN_F` to set the return value and go to the cleanup section:
-
-```c
-int fn(hbe_err_t *hbe_err) {
-  int rv = 0;
-
-  HBE_RETURN_F(1);
-  /* Translates to:
-  rv = 1;
-  goto finally;
-  */
-
-  finally:
-    return rv;
-}
-```
-
-## Top-level error handler
-
-At the very root, where the call to the first error-prone function resides, create a variable with type `hbe_err_t` set to `NULL` on the stack, and pass a reference to it:
-
-After the call, if an error occurred, the variable will be set to a value other than `NULL`.
-
-```c
-int main(void) {
-  hbe_err_t err = NULL;
-  fn(&err);
-  if (err != NULL) {
-    // An error occurred
-  }
-}
-```
diff --git a/notes/code/scope-naming.md b/notes/code/scope-naming.md
deleted file mode 100644
index 7eaaf3b..0000000
--- a/notes/code/scope-naming.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Scope naming
-
-## Public
-
-```c
-int hb_sub_function_name(int a, int b);
-```
-
-## Internal use only
-
-Used across multiple files but should only be used by this project's code.
-
-```c
-int _hb_sub_function_name(int a, int b);
-```
-
-## Within same file only
-
-```c
-// Don't declare in header file
-static int _function_name(int a, int b) {}
-```
diff --git a/notes/jmptest/test.c b/notes/jmptest/test.c
deleted file mode 100644
index d243444..0000000
--- a/notes/jmptest/test.c
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <setjmp.h>
-#include <string.h>
-
-typedef void destructor_t(void*);
-
-typedef struct runtime_s {
-  char* error;
-  void** instances;
-  destructor_t** destructors;
-} *runtime_t;
-
-static runtime_t runtime;
-
-void runtime_init(void) {
-  runtime = calloc(1, sizeof(struct runtime_s));
-  runtime->instances = calloc(10, sizeof(void*));
-  runtime->destructors = calloc(10, sizeof(destructor_t));
-}
-
-typedef struct buffer_s {
-  size_t length;
-  size_t size;
-  char* data;
-} *buffer_t;
-
-buffer_t buffer_create(void) {
-  buffer_t buffer = calloc(1, sizeof(struct buffer_s));
-  char* data = calloc(10, sizeof(char));
-  buffer->size = 10;
-  buffer->data = data;
-  return buffer;
-}
-
-void buffer_destroy(buffer_t buffer) {
-  free(buffer->data);
-  free(buffer);
-  printf("Buffer destroyed\n");
-}
-
-static jmp_buf env;
-
-void failing_function(void) {
-  printf("Entered failing_function\n");
-  longjmp(env, 1);
-}
-
-int main(void) {
-  runtime_init();
-
-  if (setjmp(env) == 0) {
-    buffer_t buffer = buffer_create();
-    runtime->instances[0] = buffer;
-    runtime->destructors[0] = (destructor_t *) &buffer_destroy;
-    memcpy(buffer->data, "Hello", 5);
-    failing_function();
-    printf("End of setjmp == 0\n");
-  } else {
-    // Error handling code
-    printf("%p: %s\n", &runtime->instances[0], ((buffer_t) runtime->instances[0])->data);
-    runtime->destructors[0](runtime->instances[0]);
-    printf("End of error handling code\n");
-  }
-
-  return EXIT_SUCCESS;
-}
diff --git a/notes/util/pipe.c.md b/notes/util/pipe.c.md
deleted file mode 100644
index 6100a63..0000000
--- a/notes/util/pipe.c.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# `pipe.c`
-
-|Name|Source|Destination|Updates position|Returns read|Fatal on EOI|
-|---|---|---|---|---|---|
-|`accept`|Buffer, then Input|Output|Yes|Yes|Yes|
-|`skip`|Buffer, then Input|-|Yes|N|Yes|
-|`peek`|Buffer, then Input|Buffer|N|Yes|Yes|
-|`write`|Parameter|Output|N|N|-|
diff --git a/src/cfg.c b/src/cfg.c
deleted file mode 100644
index cf55b55..0000000
--- a/src/cfg.c
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <hb/cfg.h>
-
-bool hb_cfg_should_min(hb_cfg_tags_set* set, nh_view_str* view)
-{
-	switch (set->mode) {
-	case HB_CFG_TAGS_SET_MODE_NONE:
-		return false;
-	case HB_CFG_TAGS_SET_MODE_ALL:
-		return true;
-	case HB_CFG_TAGS_SET_MODE_ALLOW:
-		return view != NULL && hb_set_tag_names_has(set->set, view);
-	default: /* case HB_CFG_TAGS_SET_MODE_DENY: */
-		return view == NULL || !hb_set_tag_names_has(set->set, view);
-	}
-}
diff --git a/src/cfg.h b/src/cfg.h
deleted file mode 100644
index 748eb60..0000000
--- a/src/cfg.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <hb/collection.h>
-#include <hb/err.h>
-#include <stdbool.h>
-
-typedef enum {
-	HB_CFG_TAGS_SET_MODE_NONE, // i.e. don't minify ever
-	HB_CFG_TAGS_SET_MODE_ALLOW,
-	HB_CFG_TAGS_SET_MODE_DENY,
-	HB_CFG_TAGS_SET_MODE_ALL, // i.e. minify all without exception
-} hb_cfg_tags_set_mode;
-
-typedef struct {
-	hb_cfg_tags_set_mode mode;
-	hb_set_tag_names* set;
-} hb_cfg_tags_set;
-
-typedef struct {
-	hb_cfg_tags_set collapse_whitespace;
-	hb_cfg_tags_set destroy_whole_whitespace;
-	hb_cfg_tags_set trim_whitespace;
-	hb_err_set suppressed_errors;
-	bool trim_class_attributes;
-	bool decode_entities;
-	bool remove_attr_quotes;
-	bool remove_comments;
-	bool remove_tag_whitespace;
-} hb_cfg;
-
-bool hb_cfg_should_min(hb_cfg_tags_set* set, nh_view_str* view);
diff --git a/src/code/inplace.rs b/src/code/inplace.rs
new file mode 100644
index 0000000..cb4f66e
--- /dev/null
+++ b/src/code/inplace.rs
@@ -0,0 +1,10 @@
+pub struct CodeInPlace<'data> {
+    data: &'data mut [u8],
+    read_next: usize,
+    // Offset of the next unwritten space.
+    write_next: usize,
+}
+
+impl Code for CodeInPlace {
+
+}
diff --git a/src/code/mod.rs b/src/code/mod.rs
new file mode 100644
index 0000000..9ed32fc
--- /dev/null
+++ b/src/code/mod.rs
@@ -0,0 +1,57 @@
+use std::ops::Range;
+
+pub trait Code {
+    // Unsafe direct memory access.
+    // TODO Pos refers to index of next readable.
+    unsafe fn get_src_pos(&self) -> usize;
+    /// Does NOT check bounds (assumes already checked).
+    unsafe fn set_src_pos(&self, pos: usize) -> ();
+    unsafe fn get_src_char_at(&self, pos: usize) -> u8;
+    /// Get a slice from `start` (inclusive) to `end` (exclusive).
+    unsafe fn get_src_slice(&self, range: Range<usize>) -> &[u8];
+
+    // TODO Pos refers to index of next writable.
+    unsafe fn get_out_pos(&self) -> usize;
+    /// Does NOT check bounds (assumes already checked).
+    unsafe fn set_out_pos(&self, pos: usize) -> usize;
+    unsafe fn set_out_char_at(&self, pos: usize, c: u8) -> ();
+    unsafe fn get_out_mut_slice(&self, range: Range<usize>) -> &mut [u8];
+    unsafe fn replace_out_at(&self, pos: usize, s: &[u8]) -> ();
+
+    // Checking bounds.
+    fn in_bounds(&self, offset: usize) -> bool;
+    fn at_end(&self) -> bool {
+        !self.in_bounds(0)
+    }
+
+    // Reading.
+    /// Get the `offset` character from next.
+    /// When `offset` is 0, the next character is returned.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn read(&self, offset: usize) -> u8 {
+        self.get_src_char_at(self.get_src_pos() + offset)
+    }
+    fn maybe_read(&self, offset: usize) -> Option<u8> {
+        if self.in_bounds(offset) {
+            Some(self.read(offset))
+        } else {
+            None
+        }
+    }
+    /// Get a slice of the next `count` characters from next.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn read_slice(&self, count: usize) -> &[u8] {
+        self.get_src_slice(self.get_src_pos()..self.get_src_pos() + count)
+    }
+
+    // Writing.
+    /// Move next `amount` characters to output.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn shift(&self, amount: usize) -> ();
+    fn write(&self, c: u8) -> ();
+    fn write_slice(&self, s: &[u8]) -> ();
+
+    // Skipping.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn consume(&self, amount: usize) -> ();
+}
diff --git a/src/code/outofplace.rs b/src/code/outofplace.rs
new file mode 100644
index 0000000..e58fb63
--- /dev/null
+++ b/src/code/outofplace.rs
@@ -0,0 +1,11 @@
+pub struct CodeOutOfPlace<'src, 'out> {
+    src: &'src [u8],
+    src_next: usize,
+
+    out: &'out mut [u8],
+    out_next: usize,
+}
+
+impl Code for CodeOutOfPlace {
+
+}
diff --git a/src/collection.c b/src/collection.c
deleted file mode 100644
index 403a19a..0000000
--- a/src/collection.c
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <hb/collection.h>
-
-// Data structure for mapping entity references to Unicode code points.
-NH_MAP_VIEW_STR_IMPL(hb_map_entity_references, int32_t, -1);
-
-// Data structure for a set of tag names.
-NH_SET_VIEW_ISTR_IMPL(hb_set_tag_names);
-#define hb_set_tag_names_add_whole_literal(set, str)                           \
-	hb_set_tag_names_add_whole_array(set, nh_litarr(str))
-
-// Data structure for mapping tag names to sets of tag names.
-NH_MAP_VIEW_ISTR_IMPL(hb_map_tag_relations, hb_set_tag_names*, NULL);
-#define hb_map_tag_relations_set_whole_literal(map, str, v)                    \
-	hb_map_tag_relations_set_whole_array(map, nh_litarr(str), v)
diff --git a/src/collection.h b/src/collection.h
deleted file mode 100644
index 9cfe877..0000000
--- a/src/collection.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#pragma once
-
-#include <nicehash/bitfield-ascii.h>
-#include <nicehash/bitfield.h>
-#include <nicehash/map-str.h>
-#include <nicehash/map-view-str.h>
-#include <nicehash/set-int32.h>
-#include <nicehash/set-str.h>
-#include <nicehash/set-view-str.h>
-#include <nicehash/util.h>
-#include <nicehash/view-str.h>
-#include <stdint.h>
-
-// Data structure for mapping entity references to Unicode code points.
-NH_MAP_VIEW_STR_PROTO(hb_map_entity_references, int32_t);
-
-// Data structure for a set of tag names.
-NH_SET_VIEW_ISTR_PROTO(hb_set_tag_names);
-#define hb_set_tag_names_add_whole_literal(set, str)                           \
-	hb_set_tag_names_add_whole_array(set, nh_litarr(str))
-
-// Data structure for mapping tag names to sets of tag names.
-NH_MAP_VIEW_ISTR_PROTO(hb_map_tag_relations, hb_set_tag_names*);
-#define hb_map_tag_relations_set_whole_literal(map, str, v)                    \
-	hb_map_tag_relations_set_whole_array(map, nh_litarr(str), v)
diff --git a/src/err.c b/src/err.c
deleted file mode 100644
index 0b7220e..0000000
--- a/src/err.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include <hb/err.h>
-
-// Set of error codes. Used for suppressing errors.
-NH_BITFIELD_IMPL(hb_err_set, hb_err, __HB_ERR_COUNT)
diff --git a/src/err.h b/src/err.h
deleted file mode 100644
index ba0dbd8..0000000
--- a/src/err.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-#include <hb/collection.h>
-
-typedef enum {
-	// WARNING: The __HB_ERR_COUNT value only works if the first value of
-	// this enum is set to zero.
-	HB_ERR_OK = 0,
-
-	HB_ERR_INTERR_UNKNOWN_ENTITY_TYPE,
-	HB_ERR_INTERR_UNKNOWN_CONTENT_NEXT_STATE,
-
-	HB_ERR_IO_FREAD_FAIL,
-
-	HB_ERR_PARSE_MALFORMED_ENTITY,
-	HB_ERR_PARSE_INVALID_ENTITY,
-	HB_ERR_PARSE_NONSTANDARD_TAG,
-	HB_ERR_PARSE_UCASE_TAG,
-	HB_ERR_PARSE_UCASE_ATTR,
-	HB_ERR_PARSE_UNQUOTED_ATTR,
-	HB_ERR_PARSE_ILLEGAL_CHILD,
-	HB_ERR_PARSE_UNCLOSED_TAG,
-	HB_ERR_PARSE_SELF_CLOSING_TAG,
-	HB_ERR_PARSE_NO_SPACE_BEFORE_ATTR,
-
-	HB_ERR_PARSE_UNEXPECTED_END,
-	HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-
-	// Special value to represent the amount of values above in this enum.
-	// WARNING: This only works if the first value is set to zero.
-	__HB_ERR_COUNT,
-} hb_err;
-
-// Set of error codes. Used for suppressing errors.
-NH_BITFIELD_PROTO(hb_err_set, hb_err, __HB_ERR_COUNT)
diff --git a/src/err.rs b/src/err.rs
new file mode 100644
index 0000000..ed5c308
--- /dev/null
+++ b/src/err.rs
@@ -0,0 +1,11 @@
+pub enum HbErr {
+    ExpectedCharNotFound { expected: u8, got: u8 },
+    ExpectedMatchNotFound(&'static [u8]),
+    ExpectedNotFound(&'static str),
+    NoSpaceBeforeAttr,
+    UnclosedTag,
+    UnexpectedCharFound(u8),
+    UnexpectedEnd,
+}
+
+pub type HbRes<T> = Result<T, HbErr>;
diff --git a/src/hyperbuild.c b/src/hyperbuild.c
deleted file mode 100644
index cac6982..0000000
--- a/src/hyperbuild.c
+++ /dev/null
@@ -1,179 +0,0 @@
-#include <errno.h>
-#include <fcntl.h>
-#include <hb/cfg.h>
-#include <hb/hyperbuild.h>
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/rune.h>
-#include <hb/unit.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <sys/stat.h>
-#include <sys/unistd.h>
-
-void hyperbuild_init(void)
-{
-	hb_rule_init();
-}
-
-// Rate to read from file, set to 4 KiB.
-#define READ_RATE 4096
-// Rate to resize buffer containing file contents, set to 768 KiB.
-#define GROWTH_RATE 786432
-
-static void _read_file(char const* file, hb_rune** out, size_t* out_len)
-{
-	int fd = -1;
-	bool success = false;
-	hb_rune* output = NULL;
-
-	// Open file.
-	fd = open(file, O_RDONLY);
-	if (fd < 0) {
-		// Failed to open file.
-		goto finally;
-	}
-
-	// Get file size.
-	struct stat stats;
-	if (fstat(fd, &stats) != 0) {
-		// Failed to get file size.
-		goto finally;
-	}
-	off_t size = stats.st_size;
-
-	// Allocate memory for buffer.
-	output = malloc((size + 1) * sizeof(hb_rune));
-	size_t output_capacity = size;
-	size_t output_next = 0;
-	// Read into buffer.
-	while (true) {
-		// Check if there's enough room to read READ_RATE and reallocate
-		// if necessary.
-		if (output_next + READ_RATE >= output_capacity) {
-			output_capacity += GROWTH_RATE;
-			// Make room for terminator.
-			hb_rune* new_output =
-				realloc(output, output_capacity + 1);
-			if (new_output == NULL) {
-				// Failed to reallocate memory.
-				goto finally;
-			}
-			output = new_output;
-		}
-
-		// Attempt to read READ_RATE.
-		ssize_t read_amount = read(fd, output + output_next, READ_RATE);
-		if (read_amount < 0) {
-			// Failed to read.
-			goto finally;
-		}
-
-		if (read_amount == 0) {
-			// Reached EOF.
-			break;
-		}
-		output_next += read_amount;
-	}
-
-	output[output_next] = '\xFF';
-	*out_len = output_next;
-	success = true;
-
-finally:
-	if (fd >= 0) {
-		// File descriptor is valid (success or not), close it.
-		if (close(fd) != 0) {
-			// Failed to close file descriptor.
-			success = false;
-		}
-	}
-	if (!success && output != NULL) {
-		// Failed to read file, free memory and return NULL.
-		free(output);
-		output = NULL;
-	}
-	*out = output;
-}
-
-static void _set_file_read_error(hb_proc_result* result)
-{
-	char* msg = malloc(HB_PROC_ERROR_CUSTOM_SIZE * sizeof(char));
-	snprintf(msg, HB_PROC_ERROR_CUSTOM_SIZE,
-		 "Failed to read file with system error %d", errno);
-	result->code = HB_ERR_IO_FREAD_FAIL;
-	result->msg = msg;
-	result->pos = 0;
-}
-
-hb_rune* hyperbuild_from_file(char const* file, hb_cfg* cfg,
-			      hb_proc_result* result)
-{
-	hb_rune* input;
-	size_t input_size;
-	_read_file(file, &input, &input_size);
-	if (input == NULL) {
-		_set_file_read_error(result);
-	}
-
-	hyperbuild(input, input_size, input, cfg, result);
-	return input;
-}
-
-void hyperbuild_from_file_custom_output(char const* file, hb_rune* output,
-					hb_cfg* cfg, hb_proc_result* result)
-{
-	hb_rune* input;
-	size_t input_size;
-	_read_file(file, &input, &input_size);
-	if (input == NULL) {
-		_set_file_read_error(result);
-	}
-
-	hyperbuild(input, input_size, output, cfg, result);
-	free(input);
-}
-
-hb_rune* hyperbuild_from_input(hb_rune* input, size_t input_size, hb_cfg* cfg,
-			       hb_proc_result* result)
-{
-	hb_rune* output = malloc((input_size + 1) * sizeof(hb_rune));
-	// This function will ensure output is null terminated.
-	hyperbuild(input, input_size, output, cfg, result);
-	return output;
-}
-
-void hyperbuild_in_place(hb_rune* input, size_t input_size, hb_cfg* cfg,
-			 hb_proc_result* result)
-{
-	hyperbuild(input, input_size, input, cfg, result);
-}
-
-void hyperbuild(hb_rune* input, size_t input_size, hb_rune* output, hb_cfg* cfg,
-		hb_proc_result* result)
-{
-	input[input_size] = '\xFF';
-
-	hb_proc proc = {
-		.cfg = cfg,
-		.src = input,
-		.src_len = input_size,
-		.src_next = 0,
-		.out = output,
-		.out_next = 0,
-		.result = result,
-	};
-
-	if (!setjmp(proc.start)) {
-		hb_unit_content_html(&proc, NULL);
-		// No errors occurred.
-		result->code = HB_ERR_OK;
-		result->pos = proc.out_next;
-		result->msg = NULL;
-
-		// Null terminate output.
-		output[proc.out_next] = '\0';
-	} else {
-		// An error occurred.
-	}
-}
diff --git a/src/hyperbuild.h b/src/hyperbuild.h
deleted file mode 100644
index f22db98..0000000
--- a/src/hyperbuild.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#pragma once
-
-#include <hb/cfg.h>
-#include <hb/proc.h>
-#include <hb/rune.h>
-#include <stddef.h>
-
-/**
- * Initialise internal structures and data used in processing.
- * This function must be called before using any other hyperbuild function.
- */
-void hyperbuild_init(void);
-
-/**
- * Read a file and run hyperbuild on the contents. Output will be null
- * terminated if no error occurs.
- *
- * @param file path to the file
- * @param cfg configuration to use
- * @param[out] result where to write any resulting error information
- * @return pointer to a heap-allocated array containing processed output that
- * needs to be freed
- */
-hb_rune* hyperbuild_from_file(char const* file, hb_cfg* cfg,
-			      hb_proc_result* result);
-
-/**
- * Read a file and run hyperbuild on the contents, writing to {@param output}.
- * Output will be null terminated if no error occurs. WARNING: Does not check if
- * {@param output} is large enough. It should at least match the size of the
- * file.
- *
- * @param file path to the file
- * @param output output array to write to
- * @param cfg configuration to use
- * @param[out] result where to write any resulting error information
- */
-void hyperbuild_from_file_custom_output(char const* file, hb_rune* output,
-					hb_cfg* cfg, hb_proc_result* result);
-
-/**
- * Run hyperbuild on an input array and write to a heap-allocated array. Output
- * will be null terminated if no error occurs. WARNING: Input must end with
- * '\xFF' or '\0', and {@param input_size} must not include the terminator.
- *
- * @param input input array to process
- * @param cfg configuration to use
- * @param[out] result where to write any resulting error information
- * @return pointer to a heap-allocated array containing processed output that
- * needs to be freed
- */
-hb_rune* hyperbuild_from_input(hb_rune* input, size_t input_size, hb_cfg* cfg,
-			       hb_proc_result* result);
-
-/**
- * Run hyperbuild in place on an input array. Output will be null terminated if
- * no error occurs. WARNING: Input must end with '\xFF' or '\0', and {@param
- * input_size} must not include the terminator.
- *
- * @param input input array to process
- * @param cfg configuration to use
- * @param[out] result where to write any resulting error information
- */
-void hyperbuild_in_place(hb_rune* input, size_t input_size, hb_cfg* cfg,
-			 hb_proc_result* result);
-
-/**
- * Run hyperbuild on an input array and write to {@param output}. Output will be
- * null terminated if no error occurs. WARNING: Input must end with '\xFF' or
- * '\0', and {@param input_size} must not include the terminator. WARNING: Does
- * not check if {@param output} is large enough. It should at least match the
- * size of the input.
- *
- * @param input input array to process
- * @param output output array to write to
- * @param cfg configuration to use
- * @param[out] result where to write any resulting error information
- */
-void hyperbuild(hb_rune* input, size_t input_size, hb_rune* output, hb_cfg* cfg,
-		hb_proc_result* result);
diff --git a/src/lib.rs b/src/lib.rs
index e69de29..9a363f5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1,25 @@
+mod code;
+mod err;
+mod proc;
+mod spec;
+
+use err::HbRes;
+use crate::code::Code;
+use crate::proc::content::process_content;
+use crate::proc::Processor;
+
+/**
+ * Run hyperbuild on an input array and write to {@param output}. Output will be
+ * null terminated if no error occurs. WARNING: Input must end with '\xFF' or
+ * '\0', and {@param input_size} must not include the terminator. WARNING: Does
+ * not check if {@param output} is large enough. It should at least match the
+ * size of the input.
+ *
+ * @param input input array to process
+ * @param output output array to write to
+ * @param cfg configuration to use
+ * @return result where to write any resulting error information
+ */
+fn hyperbuild<T: Code>(code: &mut T) -> HbRes<()> {
+    process_content(&Processor { data: code }, None)
+}
diff --git a/src/proc.h b/src/proc.h
deleted file mode 100644
index 0723069..0000000
--- a/src/proc.h
+++ /dev/null
@@ -1,148 +0,0 @@
-#pragma once
-
-#include <hb/cfg.h>
-#include <hb/collection.h>
-#include <hb/err.h>
-#include <hb/rune.h>
-#include <setjmp.h>
-#include <stdbool.h>
-#include <stddef.h>
-
-// Memory to allocate for a custom error message.
-#define HB_PROC_ERROR_CUSTOM_SIZE 512
-
-// Result of processing.
-typedef struct {
-	// The error code, which could be HB_ERR_OK if no errors occurred (i.e.
-	// processing completed successfully).
-	hb_err code;
-	// Error message if an error occurred. Allocated on heap and must be
-	// freed.
-	char* msg;
-	// The value of src_next at the time of error.
-	size_t pos;
-} hb_proc_result;
-
-// Processing state of a file. Most fields are used internally and set during
-// processing. Single use only; create one per processing.
-typedef struct {
-	// Settings for this run.
-	hb_cfg* cfg;
-	// This will be set just before starting to process so that when an
-	// error occurs, the processor will jump back to where this was set.
-	// This is known as a long jump and saves having to check if an error
-	// occurred at every stage of processing.
-	jmp_buf start;
-
-	// Source data, represented as an array of bytes (see hb_rune).
-	// To avoid having repeated checks and a dedicated marker/struct field
-	// for EOF, the src array will terminate with HB_EOF, an invalid Unicode
-	// byte.
-	hb_rune* src;
-	// Length of the source data.
-	size_t src_len;
-	// Offset of the next unconsumed character.
-	// This means that when src_next == src_len, there are no more
-	// unconsumed characters, the end has been reached, and the input has
-	// been processed.
-	size_t src_next;
-
-	// Where to write the output.
-	hb_rune* out;
-	// Offset of the next unwritten space.
-	size_t out_next;
-	// Result of processing, set on completion or error.
-	// There's no point in embedding it inside hb_proc, as it needs to be
-	// passed back to caller anyway.
-	hb_proc_result* result;
-} hb_proc;
-
-// Signature for a predicate function that returns true or false given a
-// character.
-typedef bool hb_proc_pred(hb_rune);
-
-// Method declarations for implementations in source files under hb/proc, sorted
-// by declaration order, grouped by file name in alphabetical order.
-
-hb_rune hb_proc_accept(hb_proc* proc);
-void hb_proc_accept_count(hb_proc* proc, size_t count);
-bool hb_proc_accept_if(hb_proc* proc, hb_rune c);
-bool hb_proc_accept_if_not(hb_proc* proc, hb_rune c);
-#define hb_proc_accept_if_matches(proc, match)                                 \
-	hb_proc_accept_if_matches_len(proc, match,                             \
-				      hb_string_literal_length(match))
-size_t hb_proc_accept_if_matches_len(hb_proc* proc, char const* match,
-				     size_t match_len);
-size_t hb_proc_accept_if_matches_line_terminator(hb_proc* proc);
-bool hb_proc_accept_if_predicate(hb_proc* proc, hb_proc_pred* pred);
-size_t hb_proc_accept_while_predicate(hb_proc* proc, hb_proc_pred* pred);
-
-void hb_proc_bounds_assert_not_eof(hb_proc* proc);
-bool hb_proc_bounds_check_offset(hb_proc* proc, size_t offset);
-void hb_proc_bounds_assert_offset(hb_proc* proc, size_t offset);
-
-#define hb_proc_matches(proc, match)                                           \
-	hb_proc_matches_len(proc, match, hb_string_literal_length(match))
-size_t hb_proc_matches_len(hb_proc* proc, char const* match, size_t match_len);
-#define hb_proc_matches_i(proc, match)                                         \
-	hb_proc_matches_len_i(proc, match, hb_string_literal_length(match))
-size_t hb_proc_matches_len_i(hb_proc* proc, char const* match,
-			     size_t match_len);
-size_t hb_proc_matches_line_terminator(hb_proc* proc);
-
-#define hb_proc_error_if_not_suppressed(proc, code, msg)                       \
-	if (!hb_err_set_has(&(proc)->cfg->suppressed_errors, code))            \
-		hb_proc_error(proc, code, msg);
-#define hb_proc_error(proc, code, msg)                                         \
-	hb_proc_error_pos_len(proc, code, (proc)->src_next, msg,               \
-			      hb_string_literal_length(msg))
-void hb_proc_error_pos_len(hb_proc* proc, hb_err code, size_t pos,
-			   char const* msg, size_t msg_len);
-#define hb_proc_error_custom(proc, code, format, ...)                          \
-	hb_proc_error_custom_pos(proc, code, (proc)->src_next, format,         \
-				 __VA_ARGS__)
-void hb_proc_error_custom_pos(hb_proc* proc, hb_err code, size_t pos,
-			      char const* format, ...);
-
-hb_eof_rune hb_proc_peek_eof(hb_proc* proc);
-hb_rune hb_proc_peek(hb_proc* proc);
-hb_eof_rune hb_proc_peek_eof_offset(hb_proc* proc, size_t offset);
-hb_rune hb_proc_peek_offset(hb_proc* proc, size_t offset);
-
-void hb_proc_require(hb_proc* proc, hb_rune c);
-hb_rune hb_proc_require_skip(hb_proc* proc, hb_rune c);
-hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred,
-				  char const* name);
-hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred,
-				       char const* name);
-#define hb_proc_require_match(proc, match)                                     \
-	hb_proc_require_match_len(proc, match, hb_string_literal_length(match))
-void hb_proc_require_match_len(hb_proc* proc, char const* match,
-			       size_t match_len);
-#define hb_proc_require_skip_match(proc, match)                                \
-	hb_proc_require_skip_match_len(proc, match,                            \
-				       hb_string_literal_length(match))
-void hb_proc_require_skip_match_len(hb_proc* proc, char const* match,
-				    size_t match_len);
-
-hb_rune hb_proc_skip(hb_proc* proc);
-size_t hb_proc_skip_amount(hb_proc* proc, size_t amount);
-size_t hb_proc_skip_if(hb_proc* proc, hb_rune c);
-size_t hb_proc_skip_while_predicate(hb_proc* proc, hb_proc_pred* pred);
-#define hb_proc_skip_if_matches(proc, match)                                   \
-	hb_proc_skip_amount(proc, hb_proc_matches(proc, match))
-
-#define hb_proc_view_init_src(name, proc)                                      \
-	nh_view_str name;                                                      \
-	nh_view_str_init(&name, (proc)->src, 0, 0)
-#define hb_proc_view_init_out(name, proc)                                      \
-	nh_view_str name;                                                      \
-	nh_view_str_init(&name, (proc)->out, 0, 0)
-void hb_proc_view_start_with_src_next(nh_view_str* view, hb_proc* proc);
-void hb_proc_view_end_with_src_prev(nh_view_str* view, hb_proc* proc);
-void hb_proc_view_start_with_out_next(nh_view_str* view, hb_proc* proc);
-void hb_proc_view_end_with_out_prev(nh_view_str* view, hb_proc* proc);
-
-void hb_proc_write(hb_proc* proc, hb_rune c);
-void hb_proc_write_view(hb_proc* proc, nh_view_str* view);
-size_t hb_proc_write_utf_8(hb_proc* proc, uint32_t c);
diff --git a/src/proc/accept.c b/src/proc/accept.c
deleted file mode 100644
index cefa41c..0000000
--- a/src/proc/accept.c
+++ /dev/null
@@ -1,168 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rune.h>
-#include <stdbool.h>
-#include <string.h>
-
-/**
- * Accept the next character.
- * Will cause an error if already at end.
- *
- * @param proc proc
- * @return next character
- * @throws on HB_ERR_PARSE_UNEXPECTED_END
- */
-hb_rune hb_proc_accept(hb_proc* proc)
-{
-	// Get the next character, throwing if EOF.
-	hb_rune c = hb_proc_peek(proc);
-
-	// Append to output.
-	hb_proc_write(proc, c);
-
-	// Mark character as consumed.
-	proc->src_next++;
-
-	return c;
-}
-
-/**
- * Accept the next `count` characters.
- * Requires at least `count` characters remaining.
- *
- * @param proc proc
- * @param count amount of characters
- * @throws on HB_ERR_PARSE_UNEXPECTED_END
- */
-void hb_proc_accept_count(hb_proc* proc, size_t count)
-{
-	hb_proc_bounds_assert_offset(proc, count);
-
-	memcpy(&proc->out[proc->out_next], &proc->src[proc->src_next], count);
-
-	proc->src_next += count;
-	proc->out_next += count;
-}
-
-/**
- * Accept the following character if it is `c`.
- * Won't match or cause an error if there are no characters remaining.
- * Undefined behaviour if `c == HB_EOF`.
- *
- * @param proc proc
- * @param c character to match
- * @return false if nothing was accepted, true otherwise
- */
-bool hb_proc_accept_if(hb_proc* proc, hb_rune c)
-{
-	hb_eof_rune n = hb_proc_peek_eof(proc);
-
-	// n != c takes care of n == HB_EOF
-	if (n != c) {
-		return false;
-	}
-
-	hb_proc_accept(proc);
-
-	return true;
-}
-
-/**
- * Accept the following character if it is not `c`.
- * Won't match or cause an error if there are no characters remaining.
- * Undefined behaviour if `c == HB_EOF`.
- *
- * @param proc proc
- * @param c character to not match
- * @return false if nothing was accepted, true otherwise
- */
-bool hb_proc_accept_if_not(hb_proc* proc, hb_rune c)
-{
-	hb_eof_rune n = hb_proc_peek_eof(proc);
-
-	// n == c takes care of n != HB_EOF
-	if (n == c) {
-		return false;
-	}
-
-	hb_proc_accept(proc);
-
-	return true;
-}
-
-/**
- * Accept the following characters if they match `match`.
- * Won't match or cause an error if there are not enough characters remaining.
- * If `match` has a length of zero, behaviour is undefined.
- *
- * @param proc proc
- * @param match characters to match
- * @param match_len length of {@arg match}
- * @return 0 if nothing was accepted, length of `match` otherwise
- */
-size_t hb_proc_accept_if_matches_len(hb_proc* proc, char const* match,
-				     size_t match_len)
-{
-	if (hb_proc_matches_len(proc, match, match_len)) {
-		hb_proc_accept_count(proc, match_len);
-	}
-
-	return match_len;
-}
-
-/**
- * Accept the following characters if they are either "\r", "\r\n", or "\n".
- * Won't cause an error if insufficient amount of characters left.
- *
- * @param proc proc
- * @return amount of characters matched
- */
-size_t hb_proc_accept_if_matches_line_terminator(hb_proc* proc)
-{
-	size_t match_len = hb_proc_matches_line_terminator(proc);
-
-	if (match_len) {
-		hb_proc_accept_count(proc, match_len);
-	}
-
-	return match_len;
-}
-
-/**
- * Accept the following character if it satisfies the predicate `pred`.
- * Won't do anything if already at the end.
- *
- * @param proc proc
- * @param pred predicate
- * @return false if nothing was accepted, true otherwise
- */
-bool hb_proc_accept_if_predicate(hb_proc* proc, hb_proc_pred* pred)
-{
-	hb_eof_rune c = hb_proc_peek_eof(proc);
-
-	if (c == HB_EOF || !(*pred)((hb_rune) c)) {
-		return false;
-	}
-
-	hb_proc_accept(proc);
-
-	return true;
-}
-
-/**
- * Accept every following character until one dissatisfies the predicate `pred`,
- * or the end is reached.
- *
- * @param proc proc
- * @param pred predicate
- * @return amount of characters accepted
- */
-size_t hb_proc_accept_while_predicate(hb_proc* proc, hb_proc_pred* pred)
-{
-	size_t count = 0;
-
-	while (hb_proc_accept_if_predicate(proc, pred)) {
-		count++;
-	}
-
-	return count;
-}
diff --git a/src/proc/attr/mod.rs b/src/proc/attr/mod.rs
new file mode 100644
index 0000000..fec31f9
--- /dev/null
+++ b/src/proc/attr/mod.rs
@@ -0,0 +1,48 @@
+use crate::proc::Processor;
+use crate::err::HbRes;
+use crate::spec::codepoint::is_control;
+use crate::code::Code;
+use crate::proc::attr::quoted::{is_attr_quote, process_quoted_val};
+use crate::proc::attr::unquoted::process_attr_unquoted_val;
+
+mod quoted;
+mod unquoted;
+
+pub enum AttrType {
+    // Special value for hb_unit_tag.
+    None,
+
+    Quoted,
+    Unquoted,
+    NoValue,
+}
+
+// Characters allowed in an attribute name.
+// NOTE: Unicode noncharacters not tested.
+// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
+fn is_name_char(c: u8) -> bool {
+    match c {
+        b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
+        c => !is_control(c),
+    }
+}
+
+pub fn process_attr<D: Code>(proc: &Processor<D>) -> HbRes<AttrType> {
+    let name = proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().slice();
+
+    let should_collapse_and_trim_value_ws = name.eq_ignore_ascii_case(b"class");
+    let has_value = proc.match_char(b'=').keep().matched();
+
+    if !has_value {
+        Ok(AttrType::NoValue)
+    } else {
+        if proc.match_pred(is_attr_quote).matched() {
+            // Quoted attribute value.
+            process_quoted_val(proc, should_collapse_and_trim_value_ws)
+        } else {
+            // Unquoted attribute value.
+            process_attr_unquoted_val(proc)?;
+            Ok(AttrType::Unquoted)
+        }
+    }
+}
diff --git a/src/proc/attr/quoted.rs b/src/proc/attr/quoted.rs
new file mode 100644
index 0000000..017b5ff
--- /dev/null
+++ b/src/proc/attr/quoted.rs
@@ -0,0 +1,322 @@
+use crate::proc::{Processor, Match};
+use crate::proc::attr::AttrType;
+use crate::code::Code;
+use crate::spec::codepoint::is_whitespace;
+use crate::proc::entity::{process_entity, parse_entity};
+use crate::err::HbRes;
+use phf::Map;
+use std::thread::current;
+
+pub fn is_double_quote(c: u8) -> bool {
+    c == b'"'
+}
+
+pub fn is_single_quote(c: u8) -> bool {
+    c == b'\''
+}
+
+// Valid attribute quote characters.
+// See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example for spec.
+pub fn is_attr_quote(c: u8) -> bool {
+    // Backtick is not a valid quote character according to spec.
+    is_double_quote(c) || is_single_quote(c)
+}
+
+pub fn is_unquoted_delimiter(c: u8) -> bool {
+    is_whitespace(c) || c == b'>'
+}
+
+static ENCODED: Map<u8, &'static [u8]> = phf_map! {
+    b'\'' => b"&#39;",
+    b'"' => b"&#34;",
+    b'>' => b"&gt;",
+    // Whitespace characters as defined by spec in crate::spec::codepoint::is_whitespace.
+    0x09 => b"&#9;",
+    0x0a => b"&#10;",
+    0x0c => b"&#12;",
+    0x0d => b"&#13;",
+    0x20 => b"&#32;",
+};
+
+#[derive(Clone, Copy)]
+enum CharType {
+    End,
+    MalformedEntity,
+    DecodedNonAscii,
+    // Normal needs associated character to be able to write it.
+    Normal(u8),
+    // Whitespace needs associated character to determine cost of encoding it.
+    Whitespace(u8),
+    SingleQuote,
+    DoubleQuote,
+    RightChevron,
+}
+
+impl CharType {
+    fn from_char(c: u8) -> CharType {
+        match c {
+            b'"' => CharType::DoubleQuote,
+            b'\'' => CharType::SingleQuote,
+            b'>' => CharType::RightChevron,
+            c => if is_whitespace(c) { CharType::Whitespace(c) } else { CharType::Normal },
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum DelimiterType {
+    Double,
+    Single,
+    Unquoted,
+}
+
+struct Metrics {
+    count_double_quotation: usize,
+    count_single_quotation: usize,
+    // NOTE: This count is amount after any trimming and collapsing of whitespace.
+    count_whitespace: usize,
+    // Since whitespace characters have varying encoded lengths, also calculate total length if all of them had to be encoded.
+    total_whitespace_encoded_length: usize,
+    // First and last character value types after any trimming and collapsing of whitespace.
+    // NOTE: First/last value characters, not quotes/delimiters.
+    first_char_type: Option<CharType>,
+    last_char_type: Option<CharType>,
+    // How many times `collect_char_type` is called. Used to determine first and last characters when writing.
+    collected_count: usize,
+}
+
+impl Metrics {
+    // Update metrics with next character type.
+    fn collect_char_type(&mut self, char_type: CharType) -> () {
+        match char_type {
+            CharType::Whitespace(c) => {
+                self.count_whitespace += 1;
+                self.total_whitespace_encoded_length += ENCODED[c].len();
+            }
+            CharType::SingleQuote => self.count_single_quotation += 1,
+            CharType::DoubleQuote => self.count_double_quotation += 1,
+            _ => (),
+        };
+
+        if self.first_char_type == None {
+            self.first_char_type = Some(char_type);
+        };
+        self.last_char_type = Some(char_type);
+        self.collected_count += 1;
+    }
+
+    fn unquoted_cost(&self) -> usize {
+        // Costs for encoding first and last characters if going with unquoted attribute value.
+        // NOTE: Don't need to consider whitespace for either as all whitespace will be encoded and counts as part of `total_whitespace_encoded_length`.
+        let first_char_encoding_cost = match self.first_char_type {
+            // WARNING: Change `first_char_is_quote_encoded` if changing here.
+            Some(CharType::DoubleQuote) => ENCODED[b'"'].len(),
+            Some(CharType::SingleQuote) => ENCODED[b'\''].len(),
+            _ => 0,
+        };
+        let first_char_is_quote_encoded = first_char_encoding_cost > 0;
+        let last_char_encoding_cost = match last_char_type {
+            Some(CharType::RightChevron) => ENCODED[b'>'].len(),
+            _ => 0,
+        };
+
+        first_char_encoding_cost
+            + self.count_double_quotation
+            + self.count_single_quotation
+            + self.total_whitespace_encoded_length
+            + last_char_encoding_cost
+            // If first char is quote and is encoded, it will be counted twice as it'll also be part of `metrics.count_*_quotation`.
+            // Subtract last to prevent underflow.
+            - first_char_is_quote_encoded as usize
+    }
+
+    fn single_quoted_cost(&self) -> usize {
+        self.count_single_quotation * ENCODED[b'\''].len() + self.count_double_quotation + self.count_whitespace
+    }
+
+    fn double_quoted_cost(&self) -> usize {
+        self.count_double_quotation * ENCODED[b'"'].len() + self.count_single_quotation + self.count_whitespace
+    }
+
+    fn get_optimal_delimiter_type(&self) -> DelimiterType {
+        // When all equal, prefer double quotes to all and single quotes to unquoted.
+        let mut min = (DelimiterType::Double, self.double_quoted_cost());
+
+        let single = (DelimiterType::Single, self.single_quoted_cost());
+        if single.1 < min.1 {
+            min = single;
+        };
+
+        let unquoted = (DelimiterType::Unquoted, self.unquoted_cost());
+        if unquoted.1 < min.1 {
+            min = unquoted;
+        };
+
+        min.0
+    }
+}
+
+fn consume_attr_value<D: Code>(
+    proc: &Processor<D>,
+    should_collapse_and_trim_ws: bool,
+    delimiter_pred: fn(u8) -> bool,
+    on_entity: fn(&Processor<D>) -> HbRes<Option<u32>>,
+    on_char: fn(char_type: CharType, char_no: usize) -> (),
+) -> HbRes<()> {
+    // Set to true when one or more immediately previous characters were whitespace and deferred for processing after the contiguous whitespace.
+    // NOTE: Only used if `should_collapse_and_trim_ws`.
+    let mut currently_in_whitespace = false;
+    let mut char_no = 0;
+    loop {
+        let char_type = if proc.match_pred(delimiter_pred).matched() {
+            // DO NOT BREAK HERE. More processing is done afterwards upon reaching end.
+            CharType::End
+        } else if proc.match_char(b'&').matched() {
+            match on_entity(proc)? {
+                Some(e) => if e <= 0x7f { CharType::from_char(e as u8) } else { CharType::DecodedNonAscii },
+                None => CharType::MalformedEntity,
+            }
+        } else {
+            CharType::from_char(proc.skip()?)
+        };
+
+        if should_collapse_and_trim_ws {
+            if let CharType::Whitespace(_) = char_type {
+                // Ignore this whitespace character, but mark the fact that we are currently in contiguous whitespace.
+                currently_in_whitespace = true;
+                continue;
+            } else {
+                // Now past whitespace (e.g. moved to non-whitespace char or end of attribute value). Either:
+                // - ignore contiguous whitespace (i.e. do nothing) if we are currently at beginning or end of value; or
+                // - collapse contiguous whitespace (i.e. count as one whitespace char) otherwise.
+                if currently_in_whitespace && first_char_type != None && char_type != CharType::End {
+                    // Collect current collapsed contiguous whitespace that was ignored previously.
+                    on_char(CharType::Whitespace(b' '), char_no);
+                    char_no += 1;
+                };
+                currently_in_whitespace = false;
+            };
+        };
+
+        if char_type == CharType::End {
+            break;
+        } else {
+            on_char(char_type, char_no);
+            char_no += 1;
+        };
+    };
+
+    Ok(())
+}
+
+// TODO Might encounter danger if Unicode whitespace is considered as whitespace.
+pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim_ws: bool) -> HbRes<AttrType> {
+    // Processing a quoted attribute value is tricky, due to the fact that
+    // it's not possible to know whether or not to unquote the value until
+    // the value has been processed. For example, decoding an entity could
+    // create whitespace in a value which might otherwise be unquotable. How
+    // this function works is:
+    //
+    // 1. Assume that the value is unquotable, and don't output any quotes.
+    // Decode any entities as necessary. Collect metrics on the types of
+    // characters in the value while processing.
+    // 2. Based on the metrics, if it's possible to not use quotes, nothing
+    // needs to be done and the function ends.
+    // 3. Choose a quote based on the amount of occurrences, to minimise the
+    // amount of encoded values.
+    // 4. Post-process the output by adding delimiter quotes and encoding
+    // quotes in values. This does mean that the output is written to twice.
+
+    let src_delimiter = proc.match_pred(is_attr_quote).discard().maybe_char();
+    let src_delimiter_pred = match src_delimiter {
+        Some(b'"') => is_double_quote,
+        Some(b'\'') => is_single_quote,
+        None => is_unquoted_delimiter,
+        _ => unreachable!(),
+    };
+
+    // Stage 1: read and collect metrics on attribute value characters.
+    let value_start_checkpoint = proc.checkpoint();
+    let mut metrics = Metrics {
+        count_double_quotation: 0,
+        count_single_quotation: 0,
+        count_whitespace: 0,
+        total_whitespace_encoded_length: 0,
+        first_char_type: None,
+        last_char_type: None,
+        collected_count: 0,
+    };
+    consume_attr_value(
+        proc,
+        should_collapse_and_trim_ws,
+        src_delimiter_pred,
+        parse_entity,
+        |char_type, _| metrics.collect_char_type(char_type),
+    )?;
+
+    // Stage 2: optimally minify attribute value using metrics.
+    value_start_checkpoint.restore();
+    let optimal_delimiter = metrics.get_optimal_delimiter_type();
+    let optimal_delimiter_char = match optimal_delimiter {
+        DelimiterType::Double => Some(b'"'),
+        DelimiterType::Single => Some(b'\''),
+        _ => None,
+    };
+    // Write opening delimiter, if any.
+    if let Some(c) = optimal_delimiter_char {
+        proc.write(c);
+    }
+    consume_attr_value(
+        proc,
+        should_collapse_and_trim_ws,
+        src_delimiter_pred,
+        process_entity,
+        |char_type, char_no| match char_type {
+            // This should never happen.
+            CharType::End => unreachable!(),
+
+            // Ignore these; already written by process_entity.
+            CharType::MalformedEntity => {}
+            CharType::DecodedNonAscii => {}
+
+            CharType::Normal(c) => proc.write(c),
+            // If unquoted, encode any whitespace anywhere.
+            CharType::Whitespace(c) => match optimal_delimiter {
+                DelimiterType::Unquoted => proc.write(ENCODED[c]),
+                _ => proc.write(c),
+            },
+            // If single quoted, encode any single quote anywhere.
+            // If unquoted, encode single quote if first character.
+            CharType::SingleQuote => match (optimal_delimiter, char_no) {
+                (DelimiterType::Single, _) | (DelimiterType::Unquoted, 0) => proc.write(ENCODED[b'\'']),
+                _ => proc.write(c),
+            },
+            // If double quoted, encode any double quote anywhere.
+            // If unquoted, encode double quote if first character.
+            CharType::DoubleQuote => match (optimal_delimiter, char_no) {
+                (DelimiterType::Double, _) | (DelimiterType::Unquoted, 0) => proc.write(ENCODED[b'"']),
+                _ => proc.write(c),
+            },
+            // If unquoted, encode right chevron if last character.
+            CharType::RightChevron => if optimal_delimiter == DelimiterType::Unquoted && char_no == metrics.collected_count - 1 {
+                proc.write(ENCODED[b'>']);
+            } else {
+                proc.write(b'>');
+            },
+        },
+    );
+    // Ensure closing delimiter in src has been matched and discarded, if any.
+    if let Some(c) = src_delimiter {
+        proc.match_char(c).expect().discard();
+    }
+    // Write closing delimiter, if any.
+    if let Some(c) = optimal_delimiter_char {
+        proc.write(c);
+    }
+
+    if optimal_delimiter != DelimiterType::Unquoted {
+        Ok(AttrType::Unquoted)
+    } else {
+        Ok(AttrType::Quoted)
+    }
+}
diff --git a/src/proc/attr/unquoted.rs b/src/proc/attr/unquoted.rs
new file mode 100644
index 0000000..26dd160
--- /dev/null
+++ b/src/proc/attr/unquoted.rs
@@ -0,0 +1,36 @@
+use crate::proc::Processor;
+use crate::err::{HbRes, HbErr};
+use crate::spec::codepoint::is_whitespace;
+use crate::code::Code;
+use crate::proc::entity::process_entity;
+
+// Characters not allowed in an unquoted attribute value.
+// See https://html.spec.whatwg.org/multipage/syntax.html#unquoted for spec.
+fn is_valid_unquoted_value_char(c: u8) -> bool {
+    match c {
+        b'"' | b'\'' | b'`' | b'=' | b'<' | b'>' => true,
+        c => !is_whitespace(c),
+    }
+}
+
+// TODO Unquoted could be optimised to quoted if used entities to encode illegal chars.
+pub fn process_attr_unquoted_val<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    let mut at_least_one_char = false;
+
+    loop {
+        if proc.match_char(b'&').matched() {
+            // Process entity.
+            // TODO Entity could decode to illegal character.
+            process_entity(proc);
+        } else if !proc.match_pred(is_valid_unquoted_value_char).keep().matched() {
+            break;
+        }
+        at_least_one_char = true;
+    }
+
+    if !at_least_one_char {
+        Err(HbErr::ExpectedNotFound("Expected unquoted attribute value"))
+    } else {
+        Ok(())
+    }
+}
diff --git a/src/proc/bang.rs b/src/proc/bang.rs
new file mode 100644
index 0000000..66ca0c2
--- /dev/null
+++ b/src/proc/bang.rs
@@ -0,0 +1,13 @@
+use crate::proc::Processor;
+use crate::code::Code;
+use crate::err::HbRes;
+
+pub fn process_bang<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    proc.match_seq(b"<!").require()?.keep();
+
+    proc.match_while_not_char(b'>').keep();
+
+    proc.match_char(b'>').require()?.keep();
+
+    Ok(())
+}
diff --git a/src/proc/bounds.c b/src/proc/bounds.c
deleted file mode 100644
index 331f96c..0000000
--- a/src/proc/bounds.c
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rune.h>
-#include <stdbool.h>
-
-/**
- * Assert that there are still unconsumed source characters remaining.
- *
- * @param proc proc
- * @throws HB_ERR_PARSE_UNEXPECTED_END if the end of the source has been reached
- */
-void hb_proc_bounds_assert_not_eof(hb_proc* proc)
-{
-	if (proc->src_next == proc->src_len) {
-		hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END,
-			      "Unexpected end of input");
-	}
-}
-
-/**
- * Check that `offset` characters from next does not exceed the end of the
- * source. When `offset` is 0, it represents the next unconsumed character.
- *
- * @param proc proc
- * @param offset
- * @return true if src_next + offset <= src_len
- */
-bool hb_proc_bounds_check_offset(hb_proc* proc, size_t offset)
-{
-	return proc->src_next + offset <= proc->src_len;
-}
-
-/**
- * Assert that `offset` characters from next does not exceed the end of the
- * source. When `offset` is 0, it represents the next unconsumed character.
- *
- * @param proc proc
- * @param offset
- * @throws HB_ERR_PARSE_UNEXPECTED_END if `offset` exceeds end
- */
-void hb_proc_bounds_assert_offset(hb_proc* proc, size_t offset)
-{
-	if (!hb_proc_bounds_check_offset(proc, offset)) {
-		hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END,
-			      "Unexpected end of input");
-	}
-}
diff --git a/src/proc/comment.rs b/src/proc/comment.rs
new file mode 100644
index 0000000..a09e47a
--- /dev/null
+++ b/src/proc/comment.rs
@@ -0,0 +1,14 @@
+use crate::proc::Processor;
+use crate::code::Code;
+use crate::err::HbRes;
+
+pub fn process_comment<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    proc.match_seq(b"<!--").expect().discard();
+
+    // TODO Cannot use this pattern
+    proc.match_while_not_seq(b"-->").discard();
+
+    proc.match_seq(b"-->").require_with_reason("comment end")?.discard();
+
+    Ok(())
+}
diff --git a/src/proc/content.rs b/src/proc/content.rs
new file mode 100644
index 0000000..df18074
--- /dev/null
+++ b/src/proc/content.rs
@@ -0,0 +1,156 @@
+use crate::code::Code;
+use crate::proc::Processor;
+use crate::spec::codepoint::is_whitespace;
+use crate::proc::comment::process_comment;
+use crate::proc::bang::process_bang;
+use crate::proc::entity::process_entity;
+use crate::proc::tag::process_tag;
+use crate::err::HbRes;
+use crate::spec::tag::wss::WSS_TAGS;
+use crate::spec::tag::content::CONTENT_TAGS;
+use crate::spec::tag::formatting::FORMATTING_TAGS;
+
+#[derive(PartialEq)]
+enum State {
+	Comment,
+	Bang,
+	OpeningTag,
+
+	Start,
+	End,
+	Entity,
+	Whitespace,
+	Text,
+}
+
+impl State {
+	fn is_comment_bang_opening_tag(&self) -> bool {
+		match self {
+			State::Comment | State::Bang | State::OpeningTag => true,
+			_ => false,
+		}
+	}
+
+	fn next_state<D: Code>(proc: &Processor<D>) -> State {
+		// TODO Optimise to trie.
+
+		if proc.data.at_end() || proc.match_seq(b"</").matched() {
+			return State::End;
+		}
+
+		if proc.match_pred(is_whitespace).matched() {
+			return State::Whitespace;
+		}
+
+		if proc.match_seq(b"<!--").matched() {
+			return State::Comment;
+		}
+
+		// Check after comment
+		if proc.match_seq(b"<!").matched() {
+			return State::Bang;
+		};
+
+		// Check after comment and bang
+		if proc.match_char(b'<').matched() {
+			return State::OpeningTag;
+		};
+
+		if proc.match_char(b'&').matched() {
+			return State::Entity;
+		};
+
+		return State::Text;
+	}
+}
+
+/*
+ * Whitespace handling is the trickiest part of this function.
+ * There are three potential minification settings that affect whitespace
+ * handling:
+ *   - collapse
+ *   - destroy whole
+ *   - trim
+ * What whitespace to minify depends on the parent and configured settings.
+ * We want to prevent memory allocation and use only one pass, but whitespace
+ * handling often involves looking ahead.
+ */
+pub fn process_content<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes<()> {
+	let should_collapse_whitespace = parent.filter(|p| !WSS_TAGS.contains(p)).is_some();
+	let should_destroy_whole_whitespace = parent.filter(|p| !WSS_TAGS.contains(p) && !CONTENT_TAGS.contains(p) && !FORMATTING_TAGS.contains(p)).is_some();
+	let should_trim_whitespace = parent.filter(|p| !WSS_TAGS.contains(p) && !FORMATTING_TAGS.contains(p)).is_some();
+
+	// Trim leading whitespace if configured to do so.
+	if should_trim_whitespace {
+		proc.match_while_pred(is_whitespace).discard();
+	};
+
+	let mut last_state = State::Start;
+	// Whether or not currently in whitespace.
+	let mut whitespace_start = None;
+	// If currently in whitespace, whether or not current contiguous
+	// whitespace started after a bang, comment, or tag.
+	let mut whitespace_started_after_cbot = false;
+
+	loop {
+		let next_state = State::next_state(proc);
+
+		if next_state == State::Whitespace {
+			// Whitespace is always buffered and then processed
+			// afterwards, even if not minifying.
+			proc.skip();
+
+			if last_state != State::Whitespace {
+				// This is the start of one or more whitespace
+				// characters, so start a view of this
+				// contiguous whitespace and don't write any
+				// characters that are part of it yet.
+				whitespace_start = Some(proc.start_read_slice());
+				whitespace_started_after_cbot = last_state.is_comment_bang_opening_tag();
+			} else {
+				// This is part of a contiguous whitespace, but
+				// not the start of, so simply ignore.
+			}
+		} else {
+			// Next character is not whitespace, so handle any
+			// previously buffered whitespace.
+			if let Some(whitespace_buffered) = whitespace_start {
+				if should_destroy_whole_whitespace && whitespace_started_after_cbot && next_state.is_comment_bang_opening_tag() {
+					// Whitespace is between two tags, comments, or bangs.
+					// destroy_whole_whitespace is on, so don't write it.
+				} else if should_trim_whitespace && next_state == State::End {
+					// Whitespace is trailing.
+					// should_trim_whitespace is on, so don't write it.
+				} else if should_collapse_whitespace {
+					// Current contiguous whitespace needs to be reduced to a single space character.
+					proc.write(b' ');
+				} else {
+					// Whitespace cannot be minified, so
+					// write in entirety.
+					proc.write_slice(proc.get_slice(whitespace_buffered));
+				}
+
+				// Reset whitespace buffer.
+				whitespace_start = None;
+			};
+
+			// Process and consume next character(s).
+			match next_state {
+				State::Comment => process_comment(proc),
+				State::Bang => process_bang(proc),
+				State::OpeningTag => process_tag(proc, parent),
+				State::End => (),
+				State::Entity => process_entity(proc),
+				State::Text => proc.accept(),
+				_ => unreachable!(),
+			};
+		};
+
+		last_state = next_state;
+		if next_state == State::End {
+			break;
+		};
+	};
+
+	Ok(())
+}
diff --git a/src/proc/entity.rs b/src/proc/entity.rs
new file mode 100644
index 0000000..ee4bfef
--- /dev/null
+++ b/src/proc/entity.rs
@@ -0,0 +1,177 @@
+// The minimum length of any entity is 3, which is a character entity reference
+// with a single character name. The longest UTF-8 representation of a Unicode
+// code point is 4 bytes. Because there are no character entity references with
+// a name of length 1, it's always better to decode entities for minification
+// purposes.
+
+// Based on the data sourced from https://www.w3.org/TR/html5/entities.json as
+// of 2019-04-20T04:00:00.000Z:
+// - Entity names can have [A-Za-z0-9] characters, and are case sensitive.
+// - Some character entity references do not need to end with a semicolon.
+// - The longest name is "CounterClockwiseContourIntegral", with length 31
+// (excluding leading ampersand and trailing semicolon).
+// - All entity names are at least 2 characters long.
+
+// Browser implementation behaviour to consider:
+// - It is unclear what happens if an entity name does not match case
+// sensitively but matches two or more case insensitively.
+//   - For example, given "AlphA" or "aLpha", does the browser choose "alpha" or
+//   "Alpha"?
+// - Do browsers render valid entities without trailing semicolons?
+//   - For example, how do browsers interpret "Chuck-&amp-Cheese", "1&amp1", and
+//   "&ampe;"?
+
+// hyperbuild implementation:
+// - Entities must start with an ampersand and end with a semicolon.
+// - Once an ampersand is encountered, it and the sequence of characters
+// following must match the following ECMAScript regular expression to be
+// considered a well formed entity:
+//
+//   /&(#(x[0-9a-f]{1-6}|[0-9]{1,7}))|[a-z0-9]{2,31};/i
+//
+// - If the sequence of characters following an ampersand do not combine to form
+// a well formed entity, the ampersand is considered a bare ampersand.
+//   - A bare ampersand is an ampersand that is interpreted literally and not as
+//   the start of an entity.
+//   - hyperbuild looks ahead without consuming to check if the following
+//   characters would form a well formed entity. If they don't, only the longest
+//   subsequence that could form a well formed entity is consumed.
+// - An entity is considered invalid if it is well formed but represents a
+// non-existent Unicode code point or reference name.
+
+use crate::proc::Processor;
+use crate::spec::codepoint::{is_digit, is_upper_hex_digit, is_lower_hex_digit, is_hex_digit};
+use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
+use crate::err::HbRes;
+use crate::code::Code;
+
+const MAX_UNICODE_CODE_POINT: u32 = 0x10FFFF;
+
+enum Type {
+    Malformed,
+    Name,
+    Decimal,
+    Hexadecimal,
+}
+
+fn parse_decimal(slice: &[u8]) -> Option<u32> {
+    let mut val = 0u32;
+    for c in slice {
+        val = val * 10 + (c - b'0');
+    }
+    if val > MAX_UNICODE_CODE_POINT {
+        None
+    } else {
+        val
+    }
+}
+
+fn parse_hexadecimal(slice: &[u8]) -> Option<u32> {
+    let mut val = 0u32;
+    for c in slice {
+        let digit: u32 = if is_digit(c) {
+            c - b'0'
+        } else if is_upper_hex_digit(c) {
+            c - b'A' + 10
+        } else if is_lower_hex_digit(c) {
+            c - b'a' + 10
+        } else {
+            unreachable!();
+        };
+        val = val * 16 + digit;
+    }
+    if val > MAX_UNICODE_CODE_POINT {
+        None
+    } else {
+        val
+    }
+}
+
+// This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
+pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
+    proc.match_char(b'&').expect().discard();
+
+    // The input can end at any time after initial ampersand.
+    // Examples of valid complete source code: "&", "&a", "&#", "&#09",
+    // "&amp".
+
+    // There are three stages to this function:
+    //
+    // 1. Determine the type of entity, so we can know how to parse and
+    // validate the following characters.
+    //    - This can be done by simply looking at the first and second
+    //    characters after the initial ampersand, e.g. "&#", "&#x", "&a".
+    // 2. Parse the entity data, i.e. the characters between the ampersand
+    // and semicolon.
+    //    - To avoid parsing forever on malformed entities without
+    //    semicolons, there is an upper bound on the amount of possible
+    //    characters, based on the type of entity detected from the first
+    //    stage.
+    // 3. Interpret and validate the data.
+    //    - This simply checks if it refers to a valid Unicode code point or
+    //    entity reference name.
+
+    // First stage: determine the type of entity.
+    let predicate: fn(u8) -> bool;
+    let entity_type: Type;
+    let min_len: usize;
+    let max_len: usize;
+
+    if proc.match_seq(b"#x").discard().matched() {
+        predicate = is_hex_digit;
+        entity_type = Type::Hexadecimal;
+        min_len = 1;
+        max_len = 6;
+    } else if proc.match_char(b'#').discard().matched() {
+        predicate = is_digit;
+        entity_type = Type::Decimal;
+        min_len = 1;
+        max_len = 7;
+    } else if proc.match_pred(is_valid_entity_reference_name_char).matched() {
+        predicate = is_valid_entity_reference_name_char;
+        entity_type = Type::Name;
+        min_len = 2;
+        max_len = 31;
+    } else {
+        return Ok(None);
+    }
+
+    // Second stage: try to parse a well formed entity.
+    // Malformed entity could be last few characters in code, so allow EOF during entity.
+    let data = proc.match_while_pred(predicate).discard().slice();
+    if data.len() < min_len || data.len() > max_len {
+        entity_type = Type::Malformed;
+    };
+    // Don't try to consume semicolon if entity is not well formed already.
+    if entity_type != Type::Malformed && !proc.match_char(b';').discard().matched() {
+        entity_type = Type::Malformed;
+    };
+
+    // Third stage: validate entity and decode if configured to do so.
+    Ok(match entity_type {
+        Type::Name => ENTITY_REFERENCES.get(data).map(|r| *r),
+        Type::Decimal => parse_decimal(data),
+        Type::Hexadecimal => parse_hexadecimal(data),
+        Type::Malformed => None,
+    })
+}
+
+/**
+ * Process an HTML entity.
+ *
+ * @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
+ * entity is malformed or invalid
+ */
+pub fn process_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
+    let checkpoint = proc.checkpoint();
+    let parsed = parse_entity(proc)?;
+
+    if let Some(cp) = parsed {
+        proc.write_utf8(cp);
+    } else {
+        // Write discarded characters that could not form a well formed entity.
+        checkpoint.write_skipped();
+    };
+
+    Ok(parsed)
+}
diff --git a/src/proc/error.c b/src/proc/error.c
deleted file mode 100644
index 6934dbd..0000000
--- a/src/proc/error.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <hb/proc.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-static void hb_proc_error_setandjmp(hb_proc* proc, hb_err code, size_t pos,
-				    char* msg)
-{
-	proc->result->code = code;
-	proc->result->pos = pos;
-	proc->result->msg = msg;
-	longjmp(proc->start, 1);
-}
-
-void hb_proc_error_pos_len(hb_proc* proc, hb_err code, size_t pos,
-			   char const* msg, size_t msg_len)
-{
-	char* dup = malloc((msg_len + 1) * sizeof(char));
-	memcpy(dup, msg, msg_len);
-	dup[msg_len] = '\0';
-	hb_proc_error_setandjmp(proc, code, pos, dup);
-}
-
-void hb_proc_error_custom_pos(hb_proc* proc, hb_err code, size_t pos,
-			      char const* format, ...)
-{
-	va_list args;
-	va_start(args, format);
-
-	char* msg = malloc(HB_PROC_ERROR_CUSTOM_SIZE * sizeof(char));
-	vsnprintf(msg, HB_PROC_ERROR_CUSTOM_SIZE, format, args);
-
-	va_end(args);
-
-	hb_proc_error_setandjmp(proc, code, pos, msg);
-}
diff --git a/src/proc/matches.c b/src/proc/matches.c
deleted file mode 100644
index 074b8d6..0000000
--- a/src/proc/matches.c
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <hb/proc.h>
-#include <string.h>
-
-/**
- * Checks if the next sequence of characters matches the character array
- * `match`. Won't cause an error if insufficient amount of characters left.
- *
- * @param proc proc
- * @param characters to check against
- * @return amount of characters matched, which should be equal to
- * `strlen(match)`
- */
-size_t hb_proc_matches_len(hb_proc* proc, char const* match, size_t match_len)
-{
-	// Check that there are enough characters left.
-	if (!hb_proc_bounds_check_offset(proc, match_len))
-		return 0;
-
-	// Compare characters with fast memcmp.
-	if (memcmp(&proc->src[proc->src_next], match, match_len) != 0)
-		return 0;
-
-	// Return amount of characters matched.
-	return match_len;
-}
-
-/**
- * Checks if the next sequence of characters matches the character array `match`
- * of lowercase characters ignoring case. Won't cause an error if insufficient
- * amount of characters left.
- *
- * @param proc proc
- * @param characters to check against ignoring case
- * @return amount of characters matched, which should be equal to
- * `strlen(match)`
- */
-size_t hb_proc_matches_len_i(hb_proc* proc, char const* match, size_t match_len)
-{
-	// Check that there are enough characters left.
-	if (!hb_proc_bounds_check_offset(proc, match_len))
-		return 0;
-
-	// Compare characters ignoring case using strncasecmp.
-	if (strncasecmp(&proc->src[proc->src_next], match, match_len) != 0)
-		return 0;
-
-	return match_len;
-}
-
-/**
- * Checks if the next sequence of characters is "\r", "\n", or "\r\n".
- * Won't cause an error if insufficient amount of characters left.
- *
- * @param proc proc
- * @return amount of characters matched
- */
-size_t hb_proc_matches_line_terminator(hb_proc* proc)
-{
-	// Comparing against `\r\n` must be done before `\r`.
-	return hb_proc_matches(proc, "\r\n")
-		       ? 2
-		       : hb_proc_matches(proc, "\r")
-				 ? 1
-				 : hb_proc_matches(proc, "\n");
-}
diff --git a/src/proc/mod.rs b/src/proc/mod.rs
new file mode 100644
index 0000000..cf8c259
--- /dev/null
+++ b/src/proc/mod.rs
@@ -0,0 +1,368 @@
+use crate::err::{HbErr, HbRes};
+use phf::Set;
+use crate::code::Code;
+
+pub mod attr;
+pub mod bang;
+pub mod comment;
+pub mod content;
+pub mod entity;
+pub mod script;
+pub mod style;
+pub mod tag;
+
+pub enum RequireReason {
+    Custom,
+    ExpectedNotChar(u8),
+    ExpectedMatch(&'static [u8]),
+    ExpectedChar(u8),
+}
+
+struct Match<'d, D: Code> {
+    data: &'d mut D,
+    // Need to record start as we might get slice after keeping or skipping.
+    start: usize,
+    // Guaranteed amount of characters that exist from `start` at time of creation of this struct.
+    count: usize,
+    // Character matched, if any. Only exists for single-character matches and if matched.
+    char: Option<u8>,
+    reason: RequireReason,
+}
+
+impl<D: Code> Match<'_, D> {
+    // Query
+    pub fn matched(&self) -> bool {
+        self.count > 0
+    }
+    pub fn length(&self) -> usize {
+        self.count
+    }
+    pub fn char(&self) -> u8 {
+        self.char.unwrap()
+    }
+    pub fn maybe_char(&self) -> Option<u8> {
+        self.char
+    }
+    pub fn slice(&self) -> &[u8] {
+        self.data.get_src_slice(self.start..self.start + self.count)
+    }
+
+    // Assert
+    fn _require(&self, custom_reason: Option<&'static str>) -> HbRes<&Self> {
+        if self.count > 0 {
+            Ok(self)
+        } else {
+            match self.reason {
+                RequireReason::Custom => Err(HbErr::ExpectedNotFound(custom_reason.unwrap())),
+                RequireReason::ExpectedNotChar(c) => Err(HbErr::ExpectedCharNotFound {
+                    expected: c,
+                    got: self.char.unwrap(),
+                }),
+                RequireReason::ExpectedChar(c) => Err(HbErr::UnexpectedCharFound(c)),
+                RequireReason::ExpectedMatch(m) => Err(HbErr::ExpectedMatchNotFound(m)),
+            }
+        }
+    }
+    pub fn require(&self) -> HbRes<&Self> {
+        self._require(None)
+    }
+    pub fn require_with_reason(&self, reason: &'static str) -> HbRes<&Self> {
+        self._require(Some(reason))
+    }
+    // TODO Document
+    pub fn expect(&self) -> &Self {
+        // TODO Maybe debug_assert?
+        assert!(self.count > 0);
+        self
+    }
+
+    // Commit.
+    // Note that self.count has already been verified to be valid, so don't need to bounds check again.
+    pub fn keep(&self) -> &Self {
+        self.data.shift(self.count);
+        self
+    }
+    pub fn discard(&self) -> &Self {
+        self.data.set_src_pos(self.count);
+        self
+    }
+}
+
+struct Checkpoint<'d, D: Code> {
+    data: &'d mut D,
+    src_pos: usize,
+    out_pos: usize,
+}
+
+impl<D: Code> Checkpoint<'_, D> {
+    pub fn restore(&self) -> () {
+        self.data.set_src_pos(self.src_pos);
+        self.data.set_out_pos(self.out_pos);
+    }
+
+    /// Write characters skipped from source since checkpoint. Must not have written anything since checkpoint.
+    pub fn write_skipped(&self) -> () {
+        // Make sure that nothing has been written since checkpoint (which would be lost).
+        debug_assert_eq!(self.data.get_out_pos(), self.out_pos);
+        // Get src code from checkpoint until last consumed character (inclusive).
+        let skipped = self.data.get_src_slice(self.src_pos..self.data.get_src_pos());
+        self.data.write_slice(skipped);
+    }
+
+    /// Discard characters written since checkpoint but keep source position.
+    pub fn erase_written(&self) -> () {
+        self.data.set_out_pos(self.out_pos);
+    }
+
+    pub fn consumed_count(&self) -> usize {
+        self.data.get_src_pos() - self.src_pos
+    }
+
+    pub fn written_count(&self) -> usize {
+        self.data.get_out_pos() - self.out_pos
+    }
+}
+
+// Processing state of a file. Most fields are used internally and set during
+// processing. Single use only; create one per processing.
+pub struct Processor<'data, D: Code> {
+    pub data: &'data mut D,
+}
+
+fn index_of(s: &'static [u8], c: u8, from: usize) -> Option<usize> {
+    for i in from..s.len() {
+        if s[i] == c {
+            return Some(i);
+        };
+    };
+    None
+}
+
+// For fast not-matching, ensure that it's possible to continue directly to next character in string
+// when searching for first substring matching pattern in string and only partially matching pattern.
+// For example, given string "abcdabc" and pattern "abcde", normal substring searching would match
+// "abcd", fail, and then start searching from 'b' at index 1. We want to be able to continue searching
+// from 'a' at index 4.
+macro_rules! debug_assert_fast_pattern {
+    ($x:expr) => {
+        debug_assert!($x.len() > 0 && index_of($x, $x[0], 1) == None);
+    }
+}
+
+// For consistency and improvement of underlying API, only write methods in terms of the underlying API (Code methods). Do not call other Proc methods.
+// TODO Return refs for matches.
+impl<D: Code> Processor<'_, D> {
+    // Helper internal functions for match_* API.
+    fn _new_match(&self, count: usize, char: Option<u8>, reason: RequireReason) -> Match<D> {
+        Match {
+            data: self.data,
+            start: self.data.get_src_pos(),
+            count,
+            char,
+            reason,
+        }
+    }
+    fn _match_one<C: FnOnce(u8) -> bool>(&self, cond: C, reason: RequireReason) -> Match<D> {
+        let m = self.data.maybe_read(0).filter(|n| cond(*n));
+        self._new_match(m.is_some() as usize, m, reason)
+    }
+    fn _match_greedy<C: FnOnce(u8) -> bool>(&self, cond: C) -> Match<D> {
+        let mut count = 0usize;
+        while self.data.in_bounds(count) && cond(self.data.read(count)) {
+            count += 1;
+        };
+        self._new_match(count, None, RequireReason::Custom)
+    }
+
+    // Single-char matching API.
+    pub fn match_char(&self, c: u8) -> Match<D> {
+        self._match_one(|n| n == c, RequireReason::ExpectedChar(c))
+    }
+    pub fn match_not_char(&self, c: u8) -> Match<D> {
+        self._match_one(|n| n != c, RequireReason::ExpectedNotChar(c))
+    }
+    pub fn match_member(&self, set: Set<u8>) -> Match<D> {
+        self._match_one(|n| set.contains(&n), RequireReason::Custom)
+    }
+    pub fn match_not_member(&self, set: Set<u8>) -> Match<D> {
+        self._match_one(|n| !set.contains(&n), RequireReason::Custom)
+    }
+    pub fn match_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
+        self._match_one(|n| pred(n), RequireReason::Custom)
+    }
+    pub fn match_not_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
+        self._match_one(|n| !pred(n), RequireReason::Custom)
+    }
+
+    // Match a sequence of characters.
+    pub fn match_seq(&self, pat: &'static [u8]) -> Match<D> {
+        debug_assert_fast_pattern!(pat);
+        // For faster short-circuiting matching, compare char-by-char instead of slices.
+        let len = pat.len();
+        let mut count = 0;
+        if len > 0 && self.data.in_bounds(len - 1) {
+            for i in 0..len {
+                if self.data.read(i) != pat[i] {
+                    count = 0;
+                    break;
+                };
+                count += 1;
+            };
+        };
+        self._new_match(count, None, RequireReason::Custom)
+    }
+    pub fn match_line_terminator(&self) -> Match<D> {
+        self._new_match(match self.data.maybe_read(0) {
+            Some(b'\n') => 1,
+            Some(b'\r') => 1 + self.data.maybe_read(1).filter(|c| *c == b'\n').is_some() as usize,
+            _ => 0,
+        }, None, RequireReason::Custom)
+    }
+
+    // Multi-char matching API.
+    pub fn match_while_char(&self, c: u8) -> Match<D> {
+        self._match_greedy(|n| n == c)
+    }
+    pub fn match_while_not_char(&self, c: u8) -> Match<D> {
+        self._match_greedy(|n| n != c)
+    }
+    pub fn match_while_member(&self, set: Set<u8>) -> Match<D> {
+        self._match_greedy(|n| set.contains(&n))
+    }
+    pub fn match_while_not_member(&self, set: Set<u8>) -> Match<D> {
+        self._match_greedy(|n| !set.contains(&n))
+    }
+    pub fn match_while_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
+        self._match_greedy(pred)
+    }
+    pub fn match_while_not_seq(&self, s: &'static [u8]) -> Match<D> {
+        debug_assert_fast_pattern!(s);
+        // TODO Test
+        // TODO Document
+        let mut count = 0usize;
+        let mut srcpos = 0usize;
+        // Next character in pattern to match.
+        // For example, if `patpos` is 2, we've matched 2 characters so far and need to match character at index 2 in pattern with character `srcpos` in code.
+        let mut patpos = 0usize;
+        while self.data.in_bounds(srcpos) {
+            if self.data.read(srcpos) == s[patpos] {
+                if patpos == s.len() - 1 {
+                    // Matched last character in pattern i.e. whole pattern.
+                    break;
+                } else {
+                    srcpos += 1;
+                    patpos += 1;
+                }
+            } else {
+                count += patpos;
+                if patpos == 0 {
+                    count += 1;
+                    srcpos += 1;
+                } else {
+                    patpos = 0;
+                };
+            };
+        };
+        self._new_match(count, None, RequireReason::Custom)
+    }
+
+    pub fn checkpoint(&self) -> Checkpoint<D> {
+        Checkpoint {
+            data: self.data,
+            src_pos: self.data.get_src_pos(),
+            out_pos: self.data.get_out_pos(),
+        }
+    }
+
+    /// Get the `offset` character from next.
+    /// When `offset` is 0, the next character is returned.
+    pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
+        self.data.maybe_read(offset)
+    }
+    pub fn peek_offset(&self, offset: usize) -> HbRes<u8> {
+        self.data.maybe_read(offset).ok_or(HbErr::UnexpectedEnd)
+    }
+    pub fn peek_eof(&self) -> Option<u8> {
+        self.data.maybe_read(0)
+    }
+    pub fn peek(&self) -> HbRes<u8> {
+        self.data.maybe_read(0).ok_or(HbErr::UnexpectedEnd)
+    }
+
+    /// Skip the next `count` characters (can be zero).
+    /// Will result in an error if exceeds bounds.
+    pub fn skip_amount(&self, count: usize) -> HbRes<()> {
+        // Check for zero to prevent underflow as type is usize.
+        if count == 0 || self.data.in_bounds(count - 1) {
+            self.data.consume(count);
+            Ok(())
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+    /// Skip and return the next character.
+    /// Will result in an error if exceeds bounds.
+    pub fn skip(&self) -> HbRes<u8> {
+        if !self.data.at_end() {
+            let c = self.data.read(0);
+            self.data.consume(1);
+            Ok(c)
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+
+    /// Write `c` to output. Will panic if exceeds bounds.
+    pub fn write(&self, c: u8) -> () {
+        self.data.write(c)
+    }
+    /// Write `s` to output. Will panic if exceeds bounds.
+    pub fn write_slice(&self, s: &[u8]) -> () {
+        self.data.write_slice(s)
+    }
+    /// Does not check if `c` is a valid Unicode code point.
+    pub fn write_utf8(&self, c: u32) -> () {
+        // Don't use char::encode_utf8 as it requires a valid code point,
+        // and requires passing a [u8, 4] which might be heap-allocated.
+        if c <= 0x7F {
+            // Plain ASCII.
+            self.data.write(c as u8);
+        } else if c <= 0x07FF {
+            // 2-byte UTF-8.
+            self.data.write((((c >> 6) & 0x1F) | 0xC0) as u8);
+            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
+        } else if c <= 0xFFFF {
+            // 3-byte UTF-8.
+            self.data.write((((c >> 12) & 0x0F) | 0xE0) as u8);
+            self.data.write((((c >> 6) & 0x3F) | 0x80) as u8);
+            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
+        } else if c <= 0x10FFFF {
+            // 4-byte UTF-8.
+            self.data.write((((c >> 18) & 0x07) | 0xF0) as u8);
+            self.data.write((((c >> 12) & 0x3F) | 0x80) as u8);
+            self.data.write((((c >> 6) & 0x3F) | 0x80) as u8);
+            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
+        } else {
+            unreachable!();
+        }
+    }
+
+    pub fn accept(&self) -> HbRes<u8> {
+        if !self.data.at_end() {
+            let c = self.data.read(0);
+            self.data.shift(1);
+            Ok(c)
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+    pub fn accept_amount(&self, count: usize) -> HbRes<()> {
+        // Check for zero to prevent underflow as type is usize.
+        if count == 0 || self.data.in_bounds(count - 1) {
+            self.data.shift(count);
+            Ok(())
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+}
diff --git a/src/proc/peek.c b/src/proc/peek.c
deleted file mode 100644
index c55467d..0000000
--- a/src/proc/peek.c
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rune.h>
-#include <stddef.h>
-
-/**
- * Get the next character.
- * If all characters have already been consumed, {@link HB_EOF} is returned.
- *
- * @param proc proc
- * @return character or {@link HB_EOF}
- */
-hb_eof_rune hb_proc_peek_eof(hb_proc* proc)
-{
-	return proc->src[proc->src_next];
-}
-
-/**
- * Get the next character.
- * Will cause an error if it's the end and there is no next character.
- *
- * @param proc proc
- * @return character
- * @throws on HB_ERR_PARSE_UNEXPECTED_END
- */
-hb_rune hb_proc_peek(hb_proc* proc)
-{
-	hb_proc_bounds_assert_not_eof(proc);
-
-	hb_eof_rune c = hb_proc_peek_eof(proc);
-
-	return c;
-}
-
-/**
- * Get the `offset` character from next.
- * When `offset` is 0, the next character is returned (equivalent to {@link
- * hb_proc_peek_eof}). If `offset` represents after the last character, {@link
- * HB_EOF} is returned.
- *
- * @param proc proc
- * @param offset position of character to get
- * @return character or {@link HB_EOF}
- */
-hb_eof_rune hb_proc_peek_eof_offset(hb_proc* proc, size_t offset)
-{
-	if (!hb_proc_bounds_check_offset(proc, offset))
-		return HB_EOF;
-
-	return proc->src[proc->src_next + offset];
-}
-
-/**
- * Get the `offset` character from next.
- * When `offset` is 0, the next character is returned (equivalent to {@link
- * hb_proc_peek_eof}). An error will be caused if `offset` represents after the
- * last character.
- *
- * @param proc proc
- * @param offset position of character to get
- * @return character
- * @throws on HB_ERR_PARSE_UNEXPECTED_END
- */
-hb_rune hb_proc_peek_offset(hb_proc* proc, size_t offset)
-{
-	hb_eof_rune c = hb_proc_peek_eof_offset(proc, offset);
-
-	if (c == HB_EOF) {
-		hb_proc_error(proc, HB_ERR_PARSE_UNEXPECTED_END,
-			      "Unexpected end of input");
-	}
-
-	return c;
-}
diff --git a/src/proc/require.c b/src/proc/require.c
deleted file mode 100644
index 4bde047..0000000
--- a/src/proc/require.c
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <hb/err.h>
-#include <hb/proc.h>
-#include <hb/rune.h>
-
-/**
- * Require the next character to be `c`.
- * The matched character is written to output.
- *
- * @param proc proc
- * @param c character to match
- * @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
- */
-void hb_proc_require(hb_proc* proc, hb_rune c)
-{
-	hb_rune n = hb_proc_accept(proc);
-
-	if (c != n) {
-		hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-				     "Expected `%c` (U+%x), got `%c` (U+%x)", c,
-				     c, n, n);
-	}
-}
-
-/**
- * Require the next character to be `c`.
- * The matched character is skipped over and NOT written to output, and also
- * returned.
- *
- * @param proc proc
- * @param c character to match
- * @return matched character
- * @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
- */
-hb_rune hb_proc_require_skip(hb_proc* proc, hb_rune c)
-{
-	hb_rune n = hb_proc_skip(proc);
-
-	if (c != n) {
-		hb_proc_error_custom(
-			proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-			"Expected `%c` (U+%x), got `%c` (U+%x) at %s", c, c, n,
-			n);
-	}
-
-	return n;
-}
-
-/**
- * Require the next character to satisfy the predicate `pred`.
- * The matched character is written to output.
- * If not matched, the error message will describe the expected output using
- * `name`.
- *
- * @param proc proc
- * @param pred predicate
- * @param name what to output in the error message to describe the requirement
- * @return required character
- * @throws HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
- */
-hb_rune hb_proc_require_predicate(hb_proc* proc, hb_proc_pred* pred,
-				  char const* name)
-{
-	hb_rune n = hb_proc_accept(proc);
-
-	if (!(*pred)(n)) {
-		hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-				     "Expected %s, got `%c` (U+%x)", name, n,
-				     n);
-	}
-
-	return n;
-}
-
-/**
- * Require the next character to satisfy the predicate `pred`.
- * The matched character is skipped over and NOT written to output.
- * If not matched, the error message will describe the expected output using
- * `name`.
- *
- * @param proc proc
- * @param pred predicate
- * @param name what to output in the error message to describe the requirement
- * @return required character
- * @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
- */
-hb_rune hb_proc_require_skip_predicate(hb_proc* proc, hb_proc_pred* pred,
-				       char const* name)
-{
-	hb_rune n = hb_proc_skip(proc);
-
-	if (!(*pred)(n)) {
-		hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-				     "Expected %s, got `%c` (U+%x)", name, n,
-				     n);
-	}
-
-	return n;
-}
-
-/**
- * Require the next sequence of characters to be equal to `match`.
- * Matched characters are written to output.
- *
- * @param proc proc
- * @param match sequence of characters to require
- * @param match_len length of {@arg match}
- * @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
- */
-void hb_proc_require_match_len(hb_proc* proc, char const* match,
-			       size_t match_len)
-{
-	if (!hb_proc_accept_if_matches_len(proc, match, match_len)) {
-		hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-				     "Expected `%s`", match);
-	}
-}
-
-/**
- * Require the next sequence of characters to be equal to `match`.
- * Matched characters are skipped over and NOT written to output.
- *
- * @param proc proc
- * @param match sequence of characters to require
- * @param match_len length of {@arg match}
- * @throws on HB_ERR_PARSE_UNEXPECTED_END or HB_ERR_PARSE_EXPECTED_NOT_FOUND
- */
-void hb_proc_require_skip_match_len(hb_proc* proc, char const* match,
-				    size_t match_len)
-{
-	if (!hb_proc_matches_len(proc, match, match_len)) {
-		hb_proc_error_custom(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-				     "Expected `%s`", match);
-	}
-
-	hb_proc_skip_amount(proc, match_len);
-}
diff --git a/src/proc/script.rs b/src/proc/script.rs
new file mode 100644
index 0000000..b72d8e7
--- /dev/null
+++ b/src/proc/script.rs
@@ -0,0 +1,110 @@
+use crate::err::{HbRes, HbErr};
+use crate::proc::{Processor};
+use crate::code::Code;
+
+fn is_string_delimiter(c: u8) -> bool {
+    c == b'"' || c == b'\''
+}
+
+fn parse_comment_single<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    proc.match_seq(b"//").expect().keep();
+
+    // Comment can end at closing </script>.
+    // WARNING: Closing tag must not contain whitespace.
+    // TODO Optimise
+    while !proc.match_line_terminator().keep().matched() {
+        if proc.match_seq_i(b"</script>").matched() {
+            break;
+        }
+
+        proc.accept()?;
+    }
+
+    Ok(())
+}
+
+fn parse_comment_multi<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    proc.match_seq(b"/*").expect().keep();
+
+    // Comment can end at closing </script>.
+    // WARNING: Closing tag must not contain whitespace.
+    // TODO Optimise
+    while !proc.match_seq(b"*/").keep().matched() {
+        if proc.match_seq_i(b"</script>").matched() {
+            break;
+        }
+
+        proc.accept()?;
+    };
+
+    Ok(())
+}
+
+fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    let delim = proc.match_pred(is_string_delimiter).expect().keep().char();
+
+    let mut escaping = false;
+
+    loop {
+        let c = proc.accept()?;
+
+        if c == b'\\' {
+            escaping = !escaping;
+            continue;
+        }
+
+        if c == delim && !escaping {
+            break;
+        }
+
+        if proc.match_line_terminator().keep().matched() {
+            if !escaping {
+                return Err(HbErr::ExpectedNotFound("Unterminated JavaScript string"));
+            }
+        }
+
+        escaping = false;
+    };
+
+    Ok(())
+}
+
+fn parse_template<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    proc.match_char(b'`').expect().keep();
+
+    let mut escaping = false;
+
+    loop {
+        let c = proc.accept()?;
+
+        if c == b'\\' {
+            escaping = !escaping;
+            continue;
+        }
+
+        if c == b'`' && !escaping {
+            break;
+        }
+
+        escaping = false;
+    };
+
+    Ok(())
+}
+
+pub fn process_script<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    while !proc.match_seq(b"</").matched() {
+        if proc.match_seq(b"//").matched() {
+            parse_comment_single(proc)?;
+        } else if proc.match_seq(b"/*").matched() {
+            parse_comment_multi(proc)?;
+        } else if proc.match_pred(is_string_delimiter).matched() {
+            parse_string(proc)?;
+        } else if proc.match_char(b'`').matched() {
+            parse_template(proc)?;
+        } else {
+            proc.accept()?;
+        }
+    };
+    Ok(())
+}
diff --git a/src/proc/skip.c b/src/proc/skip.c
deleted file mode 100644
index c322997..0000000
--- a/src/proc/skip.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rune.h>
-
-/**
- * Skip over the next character.
- * Requires that the file has at least one character remaining.
- *
- * @param proc proc
- * @return skipped character
- * @throws on HB_ERR_PARSE_UNEXPECTED_END
- */
-hb_rune hb_proc_skip(hb_proc* proc)
-{
-	hb_proc_bounds_assert_not_eof(proc);
-
-	hb_rune c = proc->src[proc->src_next];
-
-	proc->src_next++;
-
-	return c;
-}
-
-/**
- * Skip over the next `amount` characters.
- * Requires that the file has at least `amount` characters remaining.
- *
- * @param proc proc
- * @param amount amount of characters to skip
- * @return amount of characters skipped
- * @throws on HB_ERR_PARSE_UNEXPECTED_END
- */
-size_t hb_proc_skip_amount(hb_proc* proc, size_t amount)
-{
-	hb_proc_bounds_assert_offset(proc, amount);
-
-	proc->src_next += amount;
-
-	return amount;
-}
-
-/**
- * Skip over the following character if it is `c`.
- * Won't cause an error if the end is reached.
- * Returns the amount of characters skipped.
- * Undefined behaviour if `c == HB_EOF`.
- *
- * @param proc proc
- * @param c character to skip if next
- * @return 1 if skipped, 0 otherwise
- */
-size_t hb_proc_skip_if(hb_proc* proc, hb_rune c)
-{
-	hb_eof_rune n = hb_proc_peek_eof(proc);
-
-	// n != c takes care of n == HB_EOF
-	if (n != c) {
-		return 0;
-	}
-
-	proc->src_next++;
-
-	return 1;
-}
-
-/**
- * Skip over every following character until one dissatisfies the predicate
- * `pred`, or the end is reached.
- *
- * @param proc proc
- * @param pred predicate
- * @return amount of characters skipped
- */
-size_t hb_proc_skip_while_predicate(hb_proc* proc, hb_proc_pred* pred)
-{
-	size_t count = 0;
-
-	while (true) {
-		hb_eof_rune c = hb_proc_peek_eof_offset(proc, count);
-
-		if (c == HB_EOF || !(*pred)(c)) {
-			break;
-		}
-
-		count++;
-	}
-
-	proc->src_next += count;
-
-	return count;
-}
diff --git a/src/proc/style.rs b/src/proc/style.rs
new file mode 100644
index 0000000..7f6918d
--- /dev/null
+++ b/src/proc/style.rs
@@ -0,0 +1,65 @@
+use crate::proc::Processor;
+use crate::err::{HbRes, HbErr};
+use crate::code::Code;
+
+fn is_string_delimiter(c: u8) -> bool {
+    match c {
+        b'"' | b'\'' => true,
+        _ => false,
+    }
+}
+
+fn parse_comment<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    proc.match_seq(b"/*").expect().keep();
+
+    // Unlike script tags, style comments do NOT end at closing tag.
+    while !proc.match_seq(b"*/").keep().matched() {
+        proc.accept();
+    };
+
+    Ok(())
+}
+
+fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    let delim = proc.match_pred(is_string_delimiter).expect().keep().char();
+
+    let mut escaping = false;
+
+    loop {
+        let c = proc.accept()?;
+
+        if c == b'\\' {
+            escaping = !escaping;
+            continue;
+        }
+
+        if c == delim && !escaping {
+            break;
+        }
+
+        if proc.match_line_terminator().keep().matched() {
+            if !escaping {
+                // TODO Use better error type.
+                return Err(HbErr::ExpectedNotFound("Unterminated CSS string"));
+            }
+        }
+
+        escaping = false;
+    };
+
+    Ok(())
+}
+
+pub fn process_style<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+    while !proc.match_seq(b"</").matched() {
+        if proc.match_seq(b"/*").matched() {
+            parse_comment(proc)?;
+        } else if proc.match_pred(is_string_delimiter).matched() {
+            parse_string(proc)?;
+        } else {
+            proc.accept()?;
+        }
+    };
+
+    Ok(())
+}
diff --git a/src/proc/tag.rs b/src/proc/tag.rs
new file mode 100644
index 0000000..d07216b
--- /dev/null
+++ b/src/proc/tag.rs
@@ -0,0 +1,79 @@
+use crate::proc::attr::{AttrType, process_attr};
+use crate::err::{HbRes, HbErr};
+use crate::proc::Processor;
+use crate::spec::codepoint::{is_alphanumeric, is_whitespace};
+use crate::proc::content::process_content;
+use crate::proc::script::process_script;
+use crate::proc::style::process_style;
+use crate::spec::tag::void::VOID_TAGS;
+use crate::code::Code;
+
+// Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`.
+// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec.
+fn is_valid_tag_name_char(c: u8) -> bool {
+    is_alphanumeric(c) || c == b':' || c == b'-'
+}
+
+fn process_tag_name<'d, D: Code>(proc: &Processor<'d, D>) -> HbRes<&'d [u8]> {
+    Ok(proc.while_pred(is_valid_tag_name_char).require_reason("tag name")?.accept().slice())
+}
+
+pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes<()> {
+    proc.is('<').require().accept();
+    let name = process_tag_name(proc)?;
+
+    let mut last_attr_type = AttrType::None;
+    let mut self_closing = false;
+
+    loop {
+        // At the beginning of this loop, the last parsed unit was
+        // either the tag name or an attribute (including its value, if
+        // it had one).
+        let ws_accepted = proc.match_while_pred(is_whitespace).discard().count();
+
+        if proc.match_char(b'>').keep().matched() {
+            // End of tag.
+            break;
+        }
+
+        if self_closing = proc.match_seq(b"/>").keep().matched() {
+            break;
+        }
+
+        // HB_ERR_PARSE_NO_SPACE_BEFORE_ATTR is not suppressible as
+        // otherwise there would be difficulty in determining what is
+        // the end of a tag/attribute name/attribute value.
+        if !ws_accepted {
+            return Err(HbErr::NoSpaceBeforeAttr);
+        }
+
+        if last_attr_type != AttrType::Quoted {
+            proc.write(b' ');
+        }
+
+        last_attr_type = process_attr(proc)?;
+    }
+
+    if self_closing || VOID_TAGS.contains(&name) {
+        return Ok(());
+    }
+
+    // TODO WARNING: Tags must be case sensitive.
+    match name {
+        b"script" => process_script(proc)?,
+        b"style" => process_style(proc)?,
+        _ => process_content(proc, Some(name))?,
+    }
+
+    // Require closing tag for non-void.
+    proc.match_seq(b"</").require_with_reason("closing tag")?.keep();
+    let closing_name = process_tag_name(proc)?;
+    if name != closing_name {
+        // TODO Find a way to cleanly provide opening and closing tag
+        // names (which are views) into error message without leaking
+        // memory.
+        return Err(HbErr::UnclosedTag);
+    }
+    proc.match_char(b'>').require_with_reason("closing tag")?.keep();
+    Ok(())
+}
diff --git a/src/proc/view.c b/src/proc/view.c
deleted file mode 100644
index 47f4f8d..0000000
--- a/src/proc/view.c
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <hb/collection.h>
-#include <hb/proc.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <string.h>
-
-// A view represents a substring of the source. Faster, easier, safer, and more
-// efficient than making a copy. If the end is before the start, it's invalid,
-// like NaN. Can be used for special meaning. See lib/nicehash/view-str.h for
-// more details.
-
-// To avoid underflow, there are no hb_proc_view_start_with_*_prev functions.
-
-// Start a view at the position of the next character to consume.
-void hb_proc_view_start_with_src_next(nh_view_str* view, hb_proc* proc)
-{
-	nh_view_str_set_start(view, proc->src_next);
-}
-
-// End a view at the position of the last character consumed (inclusive).
-void hb_proc_view_end_with_src_prev(nh_view_str* view, hb_proc* proc)
-{
-	nh_view_str_set_length(view, proc->src_next <= view->start
-					     ? 0
-					     : proc->src_next - view->start);
-}
-
-// Start a view at the position of the next character that will have been
-// processed.
-void hb_proc_view_start_with_out_next(nh_view_str* view, hb_proc* proc)
-{
-	nh_view_str_set_start(view, proc->out_next);
-}
-
-// End a view at the position of the last character processed (inclusive).
-void hb_proc_view_end_with_out_prev(nh_view_str* view, hb_proc* proc)
-{
-	nh_view_str_set_length(view, proc->out_next <= view->start
-					     ? 0
-					     : proc->out_next - view->start);
-}
diff --git a/src/proc/write.c b/src/proc/write.c
deleted file mode 100644
index 29b01cf..0000000
--- a/src/proc/write.c
+++ /dev/null
@@ -1,53 +0,0 @@
-#include <hb/proc.h>
-
-void hb_proc_write(hb_proc* proc, hb_rune c)
-{
-	// WARNING: Does not check if out_next exceeds bounds.
-	proc->out[proc->out_next] = c;
-	proc->out_next++;
-}
-
-void hb_proc_write_view(hb_proc* proc, nh_view_str* view)
-{
-	// WARNING: Does not check boundaries.
-	// WARNING: This works because nh_view_str and proc->out have the same
-	// element types. Be aware should this change.
-	memcpy(&proc->out[proc->out_next], &view->array[view->start],
-	       view->length * sizeof(hb_rune));
-	proc->out_next += view->length;
-}
-
-size_t hb_proc_write_utf_8(hb_proc* proc, uint32_t c)
-{
-	if (c <= 0x7F) {
-		// Plain ASCII.
-		hb_proc_write(proc, (hb_rune) c);
-		return 1;
-	}
-
-	if (c <= 0x07FF) {
-		// 2-byte UTF-8.
-		hb_proc_write(proc, (hb_rune)(((c >> 6) & 0x1F) | 0xC0));
-		hb_proc_write(proc, (hb_rune)(((c >> 0) & 0x3F) | 0x80));
-		return 2;
-	}
-
-	if (c <= 0xFFFF) {
-		// 3-byte UTF-8.
-		hb_proc_write(proc, (hb_rune)(((c >> 12) & 0x0F) | 0xE0));
-		hb_proc_write(proc, (hb_rune)(((c >> 6) & 0x3F) | 0x80));
-		hb_proc_write(proc, (hb_rune)(((c >> 0) & 0x3F) | 0x80));
-		return 3;
-	}
-
-	if (c <= 0x10FFFF) {
-		// 4-byte UTF-8.
-		hb_proc_write(proc, (hb_rune)(((c >> 18) & 0x07) | 0xF0));
-		hb_proc_write(proc, (hb_rune)(((c >> 12) & 0x3F) | 0x80));
-		hb_proc_write(proc, (hb_rune)(((c >> 6) & 0x3F) | 0x80));
-		hb_proc_write(proc, (hb_rune)(((c >> 0) & 0x3F) | 0x80));
-		return 4;
-	}
-
-	return 0;
-}
diff --git a/src/rule.h b/src/rule.h
deleted file mode 100644
index 11f2466..0000000
--- a/src/rule.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#pragma once
-
-#include <hb/collection.h>
-#include <hb/rune.h>
-
-void hb_rule_init(void);
-
-void hb_rule_ascii_control_add_elems(nh_bitfield_ascii* set);
-void hb_rule_ascii_control_init(void);
-bool hb_rule_ascii_control_check(hb_rune c);
-
-void hb_rule_ascii_digit_add_elems(nh_bitfield_ascii* set);
-void hb_rule_ascii_digit_init(void);
-bool hb_rule_ascii_digit_check(hb_rune c);
-
-void hb_rule_ascii_hex_add_elems(nh_bitfield_ascii* set);
-void hb_rule_ascii_hex_init(void);
-bool hb_rule_ascii_hex_check(hb_rune c);
-
-void hb_rule_ascii_lowercase_add_elems(nh_bitfield_ascii* set);
-void hb_rule_ascii_lowercase_init(void);
-bool hb_rule_ascii_lowercase_check(hb_rune c);
-
-void hb_rule_ascii_uppercase_add_elems(nh_bitfield_ascii* set);
-void hb_rule_ascii_uppercase_init(void);
-bool hb_rule_ascii_uppercase_check(hb_rune c);
-
-void hb_rule_ascii_whitespace_add_elems(nh_bitfield_ascii* set);
-void hb_rule_ascii_whitespace_init(void);
-bool hb_rule_ascii_whitespace_check(hb_rune c);
-
-void hb_rule_attr_name_add_exceptions(nh_bitfield_ascii* set);
-void hb_rule_attr_name_init(void);
-bool hb_rule_attr_name_check(hb_rune c);
-
-void hb_rule_attr_quote_add_elems(nh_bitfield_ascii* set);
-void hb_rule_attr_quote_init(void);
-bool hb_rule_attr_quote_check(hb_rune c);
-
-void hb_rule_attr_unquotedvalue_add_exceptions(nh_bitfield_ascii* set);
-void hb_rule_attr_unquotedvalue_init(void);
-bool hb_rule_attr_unquotedvalue_check(hb_rune c);
-
-void hb_rule_entity_reference_map_add_entries(hb_map_entity_references* map);
-void hb_rule_entity_reference_init(void);
-bool hb_rule_entity_reference_valid_name_char(hb_rune c);
-bool hb_rule_entity_reference_exists(nh_view_str* ref);
-int32_t hb_rule_entity_reference_get_code_point(nh_view_str* ref);
-
-void hb_rule_tag_content_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_content_init(void);
-bool hb_rule_tag_content_check(nh_view_str* tag);
-
-void hb_rule_tag_contentfirst_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_contentfirst_init(void);
-bool hb_rule_tag_contentfirst_check(nh_view_str* tag);
-
-void hb_rule_tag_formatting_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_formatting_init(void);
-bool hb_rule_tag_formatting_check(nh_view_str* tag);
-
-void hb_rule_tag_heading_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_heading_init(void);
-bool hb_rule_tag_heading_check(nh_view_str* tag);
-
-void hb_rule_tag_html_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_html_init(void);
-bool hb_rule_tag_html_check(nh_view_str* tag);
-
-void hb_rule_tag_layout_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_layout_init(void);
-bool hb_rule_tag_layout_check(nh_view_str* tag);
-
-void hb_rule_tag_media_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_media_init(void);
-bool hb_rule_tag_media_check(nh_view_str* tag);
-
-void hb_rule_tag_name_add_elems(nh_bitfield_ascii* set);
-void hb_rule_tag_name_init(void);
-bool hb_rule_tag_name_check(hb_rune c);
-
-void hb_rule_tag_sectioning_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_sectioning_init(void);
-bool hb_rule_tag_sectioning_check(nh_view_str* tag);
-
-void hb_rule_tag_specific_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_specific_init(void);
-bool hb_rule_tag_specific_check(nh_view_str* tag);
-
-void hb_rule_tag_svg_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_svg_init(void);
-bool hb_rule_tag_svg_check(nh_view_str* tag);
-
-bool hb_rule_tag_valid_check(nh_view_str* tag);
-
-void hb_rule_tag_void_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_void_init(void);
-bool hb_rule_tag_void_check(nh_view_str* tag);
-
-void hb_rule_tag_wss_add_elems(hb_set_tag_names* set);
-void hb_rule_tag_wss_init(void);
-bool hb_rule_tag_wss_check(nh_view_str* tag);
-
-void hb_rule_tag_child_blacklist_map_add_entries(hb_map_tag_relations* map);
-void hb_rule_tag_child_blacklist_init(void);
-bool hb_rule_tag_child_blacklist_allowed(nh_view_str* parent,
-					 nh_view_str* child);
-
-void hb_rule_tag_child_whitelist_map_add_entries(hb_map_tag_relations* map);
-void hb_rule_tag_child_whitelist_init(void);
-bool hb_rule_tag_child_whitelist_allowed(nh_view_str* parent,
-					 nh_view_str* child);
-
-void hb_rule_tag_parent_blacklist_init(void);
-bool hb_rule_tag_parent_blacklist_allowed(nh_view_str* child,
-					  nh_view_str* parent);
-
-void hb_rule_tag_parent_whitelist_map_add_entries(hb_map_tag_relations* map);
-void hb_rule_tag_parent_whitelist_init(void);
-bool hb_rule_tag_parent_whitelist_allowed(nh_view_str* child,
-					  nh_view_str* parent);
diff --git a/src/rule/attr/name.rs b/src/rule/attr/name.rs
deleted file mode 100644
index 0a38349..0000000
--- a/src/rule/attr/name.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-use ::phf::{phf_set, Set};
-
-// Does not include control characters, which are also not allowed.
-static ATTR_NAME_NON_CONTROL_DISALLOWED: Set<char> = phf_set! {
-	' ',
-	'"',
-	'\'',
-	'>',
-	'/',
-	'=',
-	// NOTE: Unicode noncharacters not tested.
-	// (https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name)
-};
-
-fn is_valid_attr_name_char(c: char) -> bool {
-    not (ATTR_NAME_NON_CONTROL_DISALLOWED.has(c) || c.is_ascii_control())
-}
diff --git a/src/rule/attr/quote.rs b/src/rule/attr/quote.rs
deleted file mode 100644
index f68e01e..0000000
--- a/src/rule/attr/quote.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static ATTR_QUOTE: Set<char> = phf_set! {
-	// Backtick is not a valid quote character according to
-	// https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
-	'\'',
-	'"',
-};
diff --git a/src/rule/attr/unquotedvalue.rs b/src/rule/attr/unquotedvalue.rs
deleted file mode 100644
index 0515e24..0000000
--- a/src/rule/attr/unquotedvalue.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-use ::phf::{phf_set, Set};
-
-// Does not include whitespace, which is also disallowed.
-static ATTR_VAL_UNQUOTED_NON_WHITESPACE_DISALLOWED: Set<char> = phf_set! {
-	'"',
-	'\'',
-	'`',
-	'=',
-	'<',
-	'>',
-};
-
-fn is_valid_attr_value_unquoted_char(c: char) -> bool {
-    not(ATTR_VAL_UNQUOTED_NON_WHITESPACE_DISALLOWED.has(c) || c.is_ascii_whitespace())
-}
diff --git a/src/rule/entity/reference.rs b/src/rule/entity/reference.rs
deleted file mode 100644
index ad8b5a5..0000000
--- a/src/rule/entity/reference.rs
+++ /dev/null
@@ -1,2045 +0,0 @@
-use ::phf::{Map, phf_map};
-
-// Sourced from https://dev.w3.org/html5/html-author/charref at 2018-07-02T10:00:00Z.
-// HTML entities are case sensitive.
-static ENTITY_REFERENCES: Map<&'static str, u32> = phf_map! {
-	"AElig" => 0xc6,
-	"AMP" => 0x26,
-	"Aacute" => 0xc1,
-	"Abreve" => 0x102,
-	"Acirc" => 0xc2,
-	"Acy" => 0x410,
-	"Afr" => 0x1d504,
-	"Agrave" => 0xc0,
-	"Alpha" => 0x391,
-	"Amacr" => 0x100,
-	"And" => 0x2a53,
-	"Aogon" => 0x104,
-	"Aopf" => 0x1d538,
-	"ApplyFunction" => 0x2061,
-	"Aring" => 0xc5,
-	"Ascr" => 0x1d49c,
-	"Assign" => 0x2254,
-	"Atilde" => 0xc3,
-	"Auml" => 0xc4,
-	"Backslash" => 0x2216,
-	"Barv" => 0x2ae7,
-	"Barwed" => 0x2306,
-	"Bcy" => 0x411,
-	"Because" => 0x2235,
-	"Bernoullis" => 0x212c,
-	"Beta" => 0x392,
-	"Bfr" => 0x1d505,
-	"Bopf" => 0x1d539,
-	"Breve" => 0x2d8,
-	"Bscr" => 0x212c,
-	"Bumpeq" => 0x224e,
-	"CHcy" => 0x427,
-	"COPY" => 0xa9,
-	"Cacute" => 0x106,
-	"Cap" => 0x22d2,
-	"CapitalDifferentialD" => 0x2145,
-	"Cayleys" => 0x212d,
-	"Ccaron" => 0x10c,
-	"Ccedil" => 0xc7,
-	"Ccirc" => 0x108,
-	"Cconint" => 0x2230,
-	"Cdot" => 0x10a,
-	"Cedilla" => 0xb8,
-	"CenterDot" => 0xb7,
-	"Cfr" => 0x212d,
-	"Chi" => 0x3a7,
-	"CircleDot" => 0x2299,
-	"CircleMinus" => 0x2296,
-	"CirclePlus" => 0x2295,
-	"CircleTimes" => 0x2297,
-	"ClockwiseContourIntegral" => 0x2232,
-	"CloseCurlyDoubleQuote" => 0x201d,
-	"CloseCurlyQuote" => 0x2019,
-	"Colon" => 0x2237,
-	"Colone" => 0x2a74,
-	"Congruent" => 0x2261,
-	"Conint" => 0x222f,
-	"ContourIntegral" => 0x222e,
-	"Copf" => 0x2102,
-	"Coproduct" => 0x2210,
-	"CounterClockwiseContourIntegral" => 0x2233,
-	"Cross" => 0x2a2f,
-	"Cscr" => 0x1d49e,
-	"Cup" => 0x22d3,
-	"CupCap" => 0x224d,
-	"DD" => 0x2145,
-	"DDotrahd" => 0x2911,
-	"DJcy" => 0x402,
-	"DScy" => 0x405,
-	"DZcy" => 0x40f,
-	"Dagger" => 0x2021,
-	"Darr" => 0x21a1,
-	"Dashv" => 0x2ae4,
-	"Dcaron" => 0x10e,
-	"Dcy" => 0x414,
-	"Del" => 0x2207,
-	"Delta" => 0x394,
-	"Dfr" => 0x1d507,
-	"DiacriticalAcute" => 0xb4,
-	"DiacriticalDot" => 0x2d9,
-	"DiacriticalDoubleAcute" => 0x2dd,
-	"DiacriticalGrave" => 0x60,
-	"DiacriticalTilde" => 0x2dc,
-	"Diamond" => 0x22c4,
-	"DifferentialD" => 0x2146,
-	"Dopf" => 0x1d53b,
-	"Dot" => 0xa8,
-	"DotDot" => 0x20dc,
-	"DotEqual" => 0x2250,
-	"DoubleContourIntegral" => 0x222f,
-	"DoubleDot" => 0xa8,
-	"DoubleDownArrow" => 0x21d3,
-	"DoubleLeftArrow" => 0x21d0,
-	"DoubleLeftRightArrow" => 0x21d4,
-	"DoubleLeftTee" => 0x2ae4,
-	"DoubleLongLeftArrow" => 0x27f8,
-	"DoubleLongLeftRightArrow" => 0x27fa,
-	"DoubleLongRightArrow" => 0x27f9,
-	"DoubleRightArrow" => 0x21d2,
-	"DoubleRightTee" => 0x22a8,
-	"DoubleUpArrow" => 0x21d1,
-	"DoubleUpDownArrow" => 0x21d5,
-	"DoubleVerticalBar" => 0x2225,
-	"DownArrow" => 0x2193,
-	"DownArrowBar" => 0x2913,
-	"DownArrowUpArrow" => 0x21f5,
-	"DownBreve" => 0x311,
-	"DownLeftRightVector" => 0x2950,
-	"DownLeftTeeVector" => 0x295e,
-	"DownLeftVector" => 0x21bd,
-	"DownLeftVectorBar" => 0x2956,
-	"DownRightTeeVector" => 0x295f,
-	"DownRightVector" => 0x21c1,
-	"DownRightVectorBar" => 0x2957,
-	"DownTee" => 0x22a4,
-	"DownTeeArrow" => 0x21a7,
-	"Downarrow" => 0x21d3,
-	"Dscr" => 0x1d49f,
-	"Dstrok" => 0x110,
-	"ENG" => 0x14a,
-	"ETH" => 0xd0,
-	"Eacute" => 0xc9,
-	"Ecaron" => 0x11a,
-	"Ecirc" => 0xca,
-	"Ecy" => 0x42d,
-	"Edot" => 0x116,
-	"Efr" => 0x1d508,
-	"Egrave" => 0xc8,
-	"Element" => 0x2208,
-	"Emacr" => 0x112,
-	"EmptySmallSquare" => 0x25fb,
-	"EmptyVerySmallSquare" => 0x25ab,
-	"Eogon" => 0x118,
-	"Eopf" => 0x1d53c,
-	"Epsilon" => 0x395,
-	"Equal" => 0x2a75,
-	"EqualTilde" => 0x2242,
-	"Equilibrium" => 0x21cc,
-	"Escr" => 0x2130,
-	"Esim" => 0x2a73,
-	"Eta" => 0x397,
-	"Euml" => 0xcb,
-	"Exists" => 0x2203,
-	"ExponentialE" => 0x2147,
-	"Fcy" => 0x424,
-	"Ffr" => 0x1d509,
-	"FilledSmallSquare" => 0x25fc,
-	"FilledVerySmallSquare" => 0x25aa,
-	"Fopf" => 0x1d53d,
-	"ForAll" => 0x2200,
-	"Fouriertrf" => 0x2131,
-	"Fscr" => 0x2131,
-	"GJcy" => 0x403,
-	"GT" => 0x3e,
-	"Gamma" => 0x393,
-	"Gammad" => 0x3dc,
-	"Gbreve" => 0x11e,
-	"Gcedil" => 0x122,
-	"Gcirc" => 0x11c,
-	"Gcy" => 0x413,
-	"Gdot" => 0x120,
-	"Gfr" => 0x1d50a,
-	"Gg" => 0x22d9,
-	"Gopf" => 0x1d53e,
-	"GreaterEqual" => 0x2265,
-	"GreaterEqualLess" => 0x22db,
-	"GreaterFullEqual" => 0x2267,
-	"GreaterGreater" => 0x2aa2,
-	"GreaterLess" => 0x2277,
-	"GreaterSlantEqual" => 0x2a7e,
-	"GreaterTilde" => 0x2273,
-	"Gscr" => 0x1d4a2,
-	"Gt" => 0x226b,
-	"HARDcy" => 0x42a,
-	"Hacek" => 0x2c7,
-	"Hat" => 0x5e,
-	"Hcirc" => 0x124,
-	"Hfr" => 0x210c,
-	"HilbertSpace" => 0x210b,
-	"Hopf" => 0x210d,
-	"HorizontalLine" => 0x2500,
-	"Hscr" => 0x210b,
-	"Hstrok" => 0x126,
-	"HumpDownHump" => 0x224e,
-	"HumpEqual" => 0x224f,
-	"IEcy" => 0x415,
-	"IJlig" => 0x132,
-	"IOcy" => 0x401,
-	"Iacute" => 0xcd,
-	"Icirc" => 0xce,
-	"Icy" => 0x418,
-	"Idot" => 0x130,
-	"Ifr" => 0x2111,
-	"Igrave" => 0xcc,
-	"Im" => 0x2111,
-	"Imacr" => 0x12a,
-	"ImaginaryI" => 0x2148,
-	"Implies" => 0x21d2,
-	"Int" => 0x222c,
-	"Integral" => 0x222b,
-	"Intersection" => 0x22c2,
-	"InvisibleComma" => 0x2063,
-	"InvisibleTimes" => 0x2062,
-	"Iogon" => 0x12e,
-	"Iopf" => 0x1d540,
-	"Iota" => 0x399,
-	"Iscr" => 0x2110,
-	"Itilde" => 0x128,
-	"Iukcy" => 0x406,
-	"Iuml" => 0xcf,
-	"Jcirc" => 0x134,
-	"Jcy" => 0x419,
-	"Jfr" => 0x1d50d,
-	"Jopf" => 0x1d541,
-	"Jscr" => 0x1d4a5,
-	"Jsercy" => 0x408,
-	"Jukcy" => 0x404,
-	"KHcy" => 0x425,
-	"KJcy" => 0x40c,
-	"Kappa" => 0x39a,
-	"Kcedil" => 0x136,
-	"Kcy" => 0x41a,
-	"Kfr" => 0x1d50e,
-	"Kopf" => 0x1d542,
-	"Kscr" => 0x1d4a6,
-	"LJcy" => 0x409,
-	"LT" => 0x3c,
-	"Lacute" => 0x139,
-	"Lambda" => 0x39b,
-	"Lang" => 0x27ea,
-	"Laplacetrf" => 0x2112,
-	"Larr" => 0x219e,
-	"Lcaron" => 0x13d,
-	"Lcedil" => 0x13b,
-	"Lcy" => 0x41b,
-	"LeftAngleBracket" => 0x27e8,
-	"LeftArrow" => 0x2190,
-	"LeftArrowBar" => 0x21e4,
-	"LeftArrowRightArrow" => 0x21c6,
-	"LeftCeiling" => 0x2308,
-	"LeftDoubleBracket" => 0x27e6,
-	"LeftDownTeeVector" => 0x2961,
-	"LeftDownVector" => 0x21c3,
-	"LeftDownVectorBar" => 0x2959,
-	"LeftFloor" => 0x230a,
-	"LeftRightArrow" => 0x2194,
-	"LeftRightVector" => 0x294e,
-	"LeftTee" => 0x22a3,
-	"LeftTeeArrow" => 0x21a4,
-	"LeftTeeVector" => 0x295a,
-	"LeftTriangle" => 0x22b2,
-	"LeftTriangleBar" => 0x29cf,
-	"LeftTriangleEqual" => 0x22b4,
-	"LeftUpDownVector" => 0x2951,
-	"LeftUpTeeVector" => 0x2960,
-	"LeftUpVector" => 0x21bf,
-	"LeftUpVectorBar" => 0x2958,
-	"LeftVector" => 0x21bc,
-	"LeftVectorBar" => 0x2952,
-	"Leftarrow" => 0x21d0,
-	"Leftrightarrow" => 0x21d4,
-	"LessEqualGreater" => 0x22da,
-	"LessFullEqual" => 0x2266,
-	"LessGreater" => 0x2276,
-	"LessLess" => 0x2aa1,
-	"LessSlantEqual" => 0x2a7d,
-	"LessTilde" => 0x2272,
-	"Lfr" => 0x1d50f,
-	"Ll" => 0x22d8,
-	"Lleftarrow" => 0x21da,
-	"Lmidot" => 0x13f,
-	"LongLeftArrow" => 0x27f5,
-	"LongLeftRightArrow" => 0x27f7,
-	"LongRightArrow" => 0x27f6,
-	"Longleftarrow" => 0x27f8,
-	"Longleftrightarrow" => 0x27fa,
-	"Longrightarrow" => 0x27f9,
-	"Lopf" => 0x1d543,
-	"LowerLeftArrow" => 0x2199,
-	"LowerRightArrow" => 0x2198,
-	"Lscr" => 0x2112,
-	"Lsh" => 0x21b0,
-	"Lstrok" => 0x141,
-	"Lt" => 0x226a,
-	"Map" => 0x2905,
-	"Mcy" => 0x41c,
-	"MediumSpace" => 0x205f,
-	"Mellintrf" => 0x2133,
-	"Mfr" => 0x1d510,
-	"MinusPlus" => 0x2213,
-	"Mopf" => 0x1d544,
-	"Mscr" => 0x2133,
-	"Mu" => 0x39c,
-	"NJcy" => 0x40a,
-	"Nacute" => 0x143,
-	"Ncaron" => 0x147,
-	"Ncedil" => 0x145,
-	"Ncy" => 0x41d,
-	"NegativeMediumSpace" => 0x200b,
-	"NegativeThickSpace" => 0x200b,
-	"NegativeThinSpace" => 0x200b,
-	"NegativeVeryThinSpace" => 0x200b,
-	"NestedGreaterGreater" => 0x226b,
-	"NestedLessLess" => 0x226a,
-	"NewLine" => 0xa,
-	"Nfr" => 0x1d511,
-	"NoBreak" => 0x2060,
-	"NonBreakingSpace" => 0xa0,
-	"Nopf" => 0x2115,
-	"Not" => 0x2aec,
-	"NotCongruent" => 0x2262,
-	"NotCupCap" => 0x226d,
-	"NotDoubleVerticalBar" => 0x2226,
-	"NotElement" => 0x2209,
-	"NotEqual" => 0x2260,
-	"NotExists" => 0x2204,
-	"NotGreater" => 0x226f,
-	"NotGreaterEqual" => 0x2271,
-	"NotGreaterLess" => 0x2279,
-	"NotGreaterTilde" => 0x2275,
-	"NotLeftTriangle" => 0x22ea,
-	"NotLeftTriangleEqual" => 0x22ec,
-	"NotLess" => 0x226e,
-	"NotLessEqual" => 0x2270,
-	"NotLessGreater" => 0x2278,
-	"NotLessTilde" => 0x2274,
-	"NotPrecedes" => 0x2280,
-	"NotPrecedesSlantEqual" => 0x22e0,
-	"NotReverseElement" => 0x220c,
-	"NotRightTriangle" => 0x22eb,
-	"NotRightTriangleEqual" => 0x22ed,
-	"NotSquareSubsetEqual" => 0x22e2,
-	"NotSquareSupersetEqual" => 0x22e3,
-	"NotSubsetEqual" => 0x2288,
-	"NotSucceeds" => 0x2281,
-	"NotSucceedsSlantEqual" => 0x22e1,
-	"NotSupersetEqual" => 0x2289,
-	"NotTilde" => 0x2241,
-	"NotTildeEqual" => 0x2244,
-	"NotTildeFullEqual" => 0x2247,
-	"NotTildeTilde" => 0x2249,
-	"NotVerticalBar" => 0x2224,
-	"Nscr" => 0x1d4a9,
-	"Ntilde" => 0xd1,
-	"Nu" => 0x39d,
-	"OElig" => 0x152,
-	"Oacute" => 0xd3,
-	"Ocirc" => 0xd4,
-	"Ocy" => 0x41e,
-	"Odblac" => 0x150,
-	"Ofr" => 0x1d512,
-	"Ograve" => 0xd2,
-	"Omacr" => 0x14c,
-	"Omega" => 0x3a9,
-	"Omicron" => 0x39f,
-	"Oopf" => 0x1d546,
-	"OpenCurlyDoubleQuote" => 0x201c,
-	"OpenCurlyQuote" => 0x2018,
-	"Or" => 0x2a54,
-	"Oscr" => 0x1d4aa,
-	"Oslash" => 0xd8,
-	"Otilde" => 0xd5,
-	"Otimes" => 0x2a37,
-	"Ouml" => 0xd6,
-	"OverBar" => 0xaf,
-	"OverBrace" => 0x23de,
-	"OverBracket" => 0x23b4,
-	"OverParenthesis" => 0x23dc,
-	"PartialD" => 0x2202,
-	"Pcy" => 0x41f,
-	"Pfr" => 0x1d513,
-	"Phi" => 0x3a6,
-	"Pi" => 0x3a0,
-	"PlusMinus" => 0xb1,
-	"Poincareplane" => 0x210c,
-	"Popf" => 0x2119,
-	"Pr" => 0x2abb,
-	"Precedes" => 0x227a,
-	"PrecedesEqual" => 0x2aaf,
-	"PrecedesSlantEqual" => 0x227c,
-	"PrecedesTilde" => 0x227e,
-	"Prime" => 0x2033,
-	"Product" => 0x220f,
-	"Proportion" => 0x2237,
-	"Proportional" => 0x221d,
-	"Pscr" => 0x1d4ab,
-	"Psi" => 0x3a8,
-	"QUOT" => 0x22,
-	"Qfr" => 0x1d514,
-	"Qopf" => 0x211a,
-	"Qscr" => 0x1d4ac,
-	"RBarr" => 0x2910,
-	"REG" => 0xae,
-	"Racute" => 0x154,
-	"Rang" => 0x27eb,
-	"Rarr" => 0x21a0,
-	"Rarrtl" => 0x2916,
-	"Rcaron" => 0x158,
-	"Rcedil" => 0x156,
-	"Rcy" => 0x420,
-	"Re" => 0x211c,
-	"ReverseElement" => 0x220b,
-	"ReverseEquilibrium" => 0x21cb,
-	"ReverseUpEquilibrium" => 0x296f,
-	"Rfr" => 0x211c,
-	"Rho" => 0x3a1,
-	"RightAngleBracket" => 0x27e9,
-	"RightArrow" => 0x2192,
-	"RightArrowBar" => 0x21e5,
-	"RightArrowLeftArrow" => 0x21c4,
-	"RightCeiling" => 0x2309,
-	"RightDoubleBracket" => 0x27e7,
-	"RightDownTeeVector" => 0x295d,
-	"RightDownVector" => 0x21c2,
-	"RightDownVectorBar" => 0x2955,
-	"RightFloor" => 0x230b,
-	"RightTee" => 0x22a2,
-	"RightTeeArrow" => 0x21a6,
-	"RightTeeVector" => 0x295b,
-	"RightTriangle" => 0x22b3,
-	"RightTriangleBar" => 0x29d0,
-	"RightTriangleEqual" => 0x22b5,
-	"RightUpDownVector" => 0x294f,
-	"RightUpTeeVector" => 0x295c,
-	"RightUpVector" => 0x21be,
-	"RightUpVectorBar" => 0x2954,
-	"RightVector" => 0x21c0,
-	"RightVectorBar" => 0x2953,
-	"Rightarrow" => 0x21d2,
-	"Ropf" => 0x211d,
-	"RoundImplies" => 0x2970,
-	"Rrightarrow" => 0x21db,
-	"Rscr" => 0x211b,
-	"Rsh" => 0x21b1,
-	"RuleDelayed" => 0x29f4,
-	"SHCHcy" => 0x429,
-	"SHcy" => 0x428,
-	"SOFTcy" => 0x42c,
-	"Sacute" => 0x15a,
-	"Sc" => 0x2abc,
-	"Scaron" => 0x160,
-	"Scedil" => 0x15e,
-	"Scirc" => 0x15c,
-	"Scy" => 0x421,
-	"Sfr" => 0x1d516,
-	"ShortDownArrow" => 0x2193,
-	"ShortLeftArrow" => 0x2190,
-	"ShortRightArrow" => 0x2192,
-	"ShortUpArrow" => 0x2191,
-	"Sigma" => 0x3a3,
-	"SmallCircle" => 0x2218,
-	"Sopf" => 0x1d54a,
-	"Sqrt" => 0x221a,
-	"Square" => 0x25a1,
-	"SquareIntersection" => 0x2293,
-	"SquareSubset" => 0x228f,
-	"SquareSubsetEqual" => 0x2291,
-	"SquareSuperset" => 0x2290,
-	"SquareSupersetEqual" => 0x2292,
-	"SquareUnion" => 0x2294,
-	"Sscr" => 0x1d4ae,
-	"Star" => 0x22c6,
-	"Sub" => 0x22d0,
-	"Subset" => 0x22d0,
-	"SubsetEqual" => 0x2286,
-	"Succeeds" => 0x227b,
-	"SucceedsEqual" => 0x2ab0,
-	"SucceedsSlantEqual" => 0x227d,
-	"SucceedsTilde" => 0x227f,
-	"SuchThat" => 0x220b,
-	"Sum" => 0x2211,
-	"Sup" => 0x22d1,
-	"Superset" => 0x2283,
-	"SupersetEqual" => 0x2287,
-	"Supset" => 0x22d1,
-	"THORN" => 0xde,
-	"TRADE" => 0x2122,
-	"TSHcy" => 0x40b,
-	"TScy" => 0x426,
-	"Tab" => 0x9,
-	"Tau" => 0x3a4,
-	"Tcaron" => 0x164,
-	"Tcedil" => 0x162,
-	"Tcy" => 0x422,
-	"Tfr" => 0x1d517,
-	"Therefore" => 0x2234,
-	"Theta" => 0x398,
-	"ThinSpace" => 0x2009,
-	"Tilde" => 0x223c,
-	"TildeEqual" => 0x2243,
-	"TildeFullEqual" => 0x2245,
-	"TildeTilde" => 0x2248,
-	"Topf" => 0x1d54b,
-	"TripleDot" => 0x20db,
-	"Tscr" => 0x1d4af,
-	"Tstrok" => 0x166,
-	"Uacute" => 0xda,
-	"Uarr" => 0x219f,
-	"Uarrocir" => 0x2949,
-	"Ubrcy" => 0x40e,
-	"Ubreve" => 0x16c,
-	"Ucirc" => 0xdb,
-	"Ucy" => 0x423,
-	"Udblac" => 0x170,
-	"Ufr" => 0x1d518,
-	"Ugrave" => 0xd9,
-	"Umacr" => 0x16a,
-	"UnderBar" => 0x332,
-	"UnderBrace" => 0x23df,
-	"UnderBracket" => 0x23b5,
-	"UnderParenthesis" => 0x23dd,
-	"Union" => 0x22c3,
-	"UnionPlus" => 0x228e,
-	"Uogon" => 0x172,
-	"Uopf" => 0x1d54c,
-	"UpArrow" => 0x2191,
-	"UpArrowBar" => 0x2912,
-	"UpArrowDownArrow" => 0x21c5,
-	"UpDownArrow" => 0x2195,
-	"UpEquilibrium" => 0x296e,
-	"UpTee" => 0x22a5,
-	"UpTeeArrow" => 0x21a5,
-	"Uparrow" => 0x21d1,
-	"Updownarrow" => 0x21d5,
-	"UpperLeftArrow" => 0x2196,
-	"UpperRightArrow" => 0x2197,
-	"Upsi" => 0x3d2,
-	"Upsilon" => 0x3a5,
-	"Uring" => 0x16e,
-	"Uscr" => 0x1d4b0,
-	"Utilde" => 0x168,
-	"Uuml" => 0xdc,
-	"VDash" => 0x22ab,
-	"Vbar" => 0x2aeb,
-	"Vcy" => 0x412,
-	"Vdash" => 0x22a9,
-	"Vdashl" => 0x2ae6,
-	"Vee" => 0x22c1,
-	"Verbar" => 0x2016,
-	"Vert" => 0x2016,
-	"VerticalBar" => 0x2223,
-	"VerticalLine" => 0x7c,
-	"VerticalSeparator" => 0x2758,
-	"VerticalTilde" => 0x2240,
-	"VeryThinSpace" => 0x200a,
-	"Vfr" => 0x1d519,
-	"Vopf" => 0x1d54d,
-	"Vscr" => 0x1d4b1,
-	"Vvdash" => 0x22aa,
-	"Wcirc" => 0x174,
-	"Wedge" => 0x22c0,
-	"Wfr" => 0x1d51a,
-	"Wopf" => 0x1d54e,
-	"Wscr" => 0x1d4b2,
-	"Xfr" => 0x1d51b,
-	"Xi" => 0x39e,
-	"Xopf" => 0x1d54f,
-	"Xscr" => 0x1d4b3,
-	"YAcy" => 0x42f,
-	"YIcy" => 0x407,
-	"YUcy" => 0x42e,
-	"Yacute" => 0xdd,
-	"Ycirc" => 0x176,
-	"Ycy" => 0x42b,
-	"Yfr" => 0x1d51c,
-	"Yopf" => 0x1d550,
-	"Yscr" => 0x1d4b4,
-	"Yuml" => 0x178,
-	"ZHcy" => 0x416,
-	"Zacute" => 0x179,
-	"Zcaron" => 0x17d,
-	"Zcy" => 0x417,
-	"Zdot" => 0x17b,
-	"ZeroWidthSpace" => 0x200b,
-	"Zeta" => 0x396,
-	"Zfr" => 0x2128,
-	"Zopf" => 0x2124,
-	"Zscr" => 0x1d4b5,
-	"aacute" => 0xe1,
-	"abreve" => 0x103,
-	"ac" => 0x223e,
-	"acd" => 0x223f,
-	"acirc" => 0xe2,
-	"acute" => 0xb4,
-	"acy" => 0x430,
-	"aelig" => 0xe6,
-	"af" => 0x2061,
-	"afr" => 0x1d51e,
-	"agrave" => 0xe0,
-	"alefsym" => 0x2135,
-	"aleph" => 0x2135,
-	"alpha" => 0x3b1,
-	"amacr" => 0x101,
-	"amalg" => 0x2a3f,
-	"amp" => 0x26,
-	"and" => 0x2227,
-	"andand" => 0x2a55,
-	"andd" => 0x2a5c,
-	"andslope" => 0x2a58,
-	"andv" => 0x2a5a,
-	"ang" => 0x2220,
-	"ange" => 0x29a4,
-	"angle" => 0x2220,
-	"angmsd" => 0x2221,
-	"angmsdaa" => 0x29a8,
-	"angmsdab" => 0x29a9,
-	"angmsdac" => 0x29aa,
-	"angmsdad" => 0x29ab,
-	"angmsdae" => 0x29ac,
-	"angmsdaf" => 0x29ad,
-	"angmsdag" => 0x29ae,
-	"angmsdah" => 0x29af,
-	"angrt" => 0x221f,
-	"angrtvb" => 0x22be,
-	"angrtvbd" => 0x299d,
-	"angsph" => 0x2222,
-	"angst" => 0x212b,
-	"angzarr" => 0x237c,
-	"aogon" => 0x105,
-	"aopf" => 0x1d552,
-	"ap" => 0x2248,
-	"apE" => 0x2a70,
-	"apacir" => 0x2a6f,
-	"ape" => 0x224a,
-	"apid" => 0x224b,
-	"apos" => 0x27,
-	"approx" => 0x2248,
-	"approxeq" => 0x224a,
-	"aring" => 0xe5,
-	"ascr" => 0x1d4b6,
-	"ast" => 0x2a,
-	"asymp" => 0x2248,
-	"asympeq" => 0x224d,
-	"atilde" => 0xe3,
-	"auml" => 0xe4,
-	"awconint" => 0x2233,
-	"awint" => 0x2a11,
-	"bNot" => 0x2aed,
-	"backcong" => 0x224c,
-	"backepsilon" => 0x3f6,
-	"backprime" => 0x2035,
-	"backsim" => 0x223d,
-	"backsimeq" => 0x22cd,
-	"barvee" => 0x22bd,
-	"barwed" => 0x2305,
-	"barwedge" => 0x2305,
-	"bbrk" => 0x23b5,
-	"bbrktbrk" => 0x23b6,
-	"bcong" => 0x224c,
-	"bcy" => 0x431,
-	"bdquo" => 0x201e,
-	"becaus" => 0x2235,
-	"because" => 0x2235,
-	"bemptyv" => 0x29b0,
-	"bepsi" => 0x3f6,
-	"bernou" => 0x212c,
-	"beta" => 0x3b2,
-	"beth" => 0x2136,
-	"between" => 0x226c,
-	"bfr" => 0x1d51f,
-	"bigcap" => 0x22c2,
-	"bigcirc" => 0x25ef,
-	"bigcup" => 0x22c3,
-	"bigodot" => 0x2a00,
-	"bigoplus" => 0x2a01,
-	"bigotimes" => 0x2a02,
-	"bigsqcup" => 0x2a06,
-	"bigstar" => 0x2605,
-	"bigtriangledown" => 0x25bd,
-	"bigtriangleup" => 0x25b3,
-	"biguplus" => 0x2a04,
-	"bigvee" => 0x22c1,
-	"bigwedge" => 0x22c0,
-	"bkarow" => 0x290d,
-	"blacklozenge" => 0x29eb,
-	"blacksquare" => 0x25aa,
-	"blacktriangle" => 0x25b4,
-	"blacktriangledown" => 0x25be,
-	"blacktriangleleft" => 0x25c2,
-	"blacktriangleright" => 0x25b8,
-	"blank" => 0x2423,
-	"blk12" => 0x2592,
-	"blk14" => 0x2591,
-	"blk34" => 0x2593,
-	"block" => 0x2588,
-	"bnot" => 0x2310,
-	"bopf" => 0x1d553,
-	"bot" => 0x22a5,
-	"bottom" => 0x22a5,
-	"bowtie" => 0x22c8,
-	"boxDL" => 0x2557,
-	"boxDR" => 0x2554,
-	"boxDl" => 0x2556,
-	"boxDr" => 0x2553,
-	"boxH" => 0x2550,
-	"boxHD" => 0x2566,
-	"boxHU" => 0x2569,
-	"boxHd" => 0x2564,
-	"boxHu" => 0x2567,
-	"boxUL" => 0x255d,
-	"boxUR" => 0x255a,
-	"boxUl" => 0x255c,
-	"boxUr" => 0x2559,
-	"boxV" => 0x2551,
-	"boxVH" => 0x256c,
-	"boxVL" => 0x2563,
-	"boxVR" => 0x2560,
-	"boxVh" => 0x256b,
-	"boxVl" => 0x2562,
-	"boxVr" => 0x255f,
-	"boxbox" => 0x29c9,
-	"boxdL" => 0x2555,
-	"boxdR" => 0x2552,
-	"boxdl" => 0x2510,
-	"boxdr" => 0x250c,
-	"boxh" => 0x2500,
-	"boxhD" => 0x2565,
-	"boxhU" => 0x2568,
-	"boxhd" => 0x252c,
-	"boxhu" => 0x2534,
-	"boxminus" => 0x229f,
-	"boxplus" => 0x229e,
-	"boxtimes" => 0x22a0,
-	"boxuL" => 0x255b,
-	"boxuR" => 0x2558,
-	"boxul" => 0x2518,
-	"boxur" => 0x2514,
-	"boxv" => 0x2502,
-	"boxvH" => 0x256a,
-	"boxvL" => 0x2561,
-	"boxvR" => 0x255e,
-	"boxvh" => 0x253c,
-	"boxvl" => 0x2524,
-	"boxvr" => 0x251c,
-	"bprime" => 0x2035,
-	"breve" => 0x2d8,
-	"brvbar" => 0xa6,
-	"bscr" => 0x1d4b7,
-	"bsemi" => 0x204f,
-	"bsim" => 0x223d,
-	"bsime" => 0x22cd,
-	"bsol" => 0x5c,
-	"bsolb" => 0x29c5,
-	"bull" => 0x2022,
-	"bullet" => 0x2022,
-	"bump" => 0x224e,
-	"bumpE" => 0x2aae,
-	"bumpe" => 0x224f,
-	"bumpeq" => 0x224f,
-	"cacute" => 0x107,
-	"cap" => 0x2229,
-	"capand" => 0x2a44,
-	"capbrcup" => 0x2a49,
-	"capcap" => 0x2a4b,
-	"capcup" => 0x2a47,
-	"capdot" => 0x2a40,
-	"caret" => 0x2041,
-	"caron" => 0x2c7,
-	"ccaps" => 0x2a4d,
-	"ccaron" => 0x10d,
-	"ccedil" => 0xe7,
-	"ccirc" => 0x109,
-	"ccups" => 0x2a4c,
-	"ccupssm" => 0x2a50,
-	"cdot" => 0x10b,
-	"cedil" => 0xb8,
-	"cemptyv" => 0x29b2,
-	"cent" => 0xa2,
-	"centerdot" => 0xb7,
-	"cfr" => 0x1d520,
-	"chcy" => 0x447,
-	"check" => 0x2713,
-	"checkmark" => 0x2713,
-	"chi" => 0x3c7,
-	"cir" => 0x25cb,
-	"cirE" => 0x29c3,
-	"circ" => 0x2c6,
-	"circeq" => 0x2257,
-	"circlearrowleft" => 0x21ba,
-	"circlearrowright" => 0x21bb,
-	"circledR" => 0xae,
-	"circledS" => 0x24c8,
-	"circledast" => 0x229b,
-	"circledcirc" => 0x229a,
-	"circleddash" => 0x229d,
-	"cire" => 0x2257,
-	"cirfnint" => 0x2a10,
-	"cirmid" => 0x2aef,
-	"cirscir" => 0x29c2,
-	"clubs" => 0x2663,
-	"clubsuit" => 0x2663,
-	"colon" => 0x3a,
-	"colone" => 0x2254,
-	"coloneq" => 0x2254,
-	"comma" => 0x2c,
-	"commat" => 0x40,
-	"comp" => 0x2201,
-	"compfn" => 0x2218,
-	"complement" => 0x2201,
-	"complexes" => 0x2102,
-	"cong" => 0x2245,
-	"congdot" => 0x2a6d,
-	"conint" => 0x222e,
-	"copf" => 0x1d554,
-	"coprod" => 0x2210,
-	"copy" => 0xa9,
-	"copysr" => 0x2117,
-	"crarr" => 0x21b5,
-	"cross" => 0x2717,
-	"cscr" => 0x1d4b8,
-	"csub" => 0x2acf,
-	"csube" => 0x2ad1,
-	"csup" => 0x2ad0,
-	"csupe" => 0x2ad2,
-	"ctdot" => 0x22ef,
-	"cudarrl" => 0x2938,
-	"cudarrr" => 0x2935,
-	"cuepr" => 0x22de,
-	"cuesc" => 0x22df,
-	"cularr" => 0x21b6,
-	"cularrp" => 0x293d,
-	"cup" => 0x222a,
-	"cupbrcap" => 0x2a48,
-	"cupcap" => 0x2a46,
-	"cupcup" => 0x2a4a,
-	"cupdot" => 0x228d,
-	"cupor" => 0x2a45,
-	"curarr" => 0x21b7,
-	"curarrm" => 0x293c,
-	"curlyeqprec" => 0x22de,
-	"curlyeqsucc" => 0x22df,
-	"curlyvee" => 0x22ce,
-	"curlywedge" => 0x22cf,
-	"curren" => 0xa4,
-	"curvearrowleft" => 0x21b6,
-	"curvearrowright" => 0x21b7,
-	"cuvee" => 0x22ce,
-	"cuwed" => 0x22cf,
-	"cwconint" => 0x2232,
-	"cwint" => 0x2231,
-	"cylcty" => 0x232d,
-	"dArr" => 0x21d3,
-	"dHar" => 0x2965,
-	"dagger" => 0x2020,
-	"daleth" => 0x2138,
-	"darr" => 0x2193,
-	"dash" => 0x2010,
-	"dashv" => 0x22a3,
-	"dbkarow" => 0x290f,
-	"dblac" => 0x2dd,
-	"dcaron" => 0x10f,
-	"dcy" => 0x434,
-	"dd" => 0x2146,
-	"ddagger" => 0x2021,
-	"ddarr" => 0x21ca,
-	"ddotseq" => 0x2a77,
-	"deg" => 0xb0,
-	"delta" => 0x3b4,
-	"demptyv" => 0x29b1,
-	"dfisht" => 0x297f,
-	"dfr" => 0x1d521,
-	"dharl" => 0x21c3,
-	"dharr" => 0x21c2,
-	"diam" => 0x22c4,
-	"diamond" => 0x22c4,
-	"diamondsuit" => 0x2666,
-	"diams" => 0x2666,
-	"die" => 0xa8,
-	"digamma" => 0x3dd,
-	"disin" => 0x22f2,
-	"div" => 0xf7,
-	"divide" => 0xf7,
-	"divideontimes" => 0x22c7,
-	"divonx" => 0x22c7,
-	"djcy" => 0x452,
-	"dlcorn" => 0x231e,
-	"dlcrop" => 0x230d,
-	"dollar" => 0x24,
-	"dopf" => 0x1d555,
-	"dot" => 0x2d9,
-	"doteq" => 0x2250,
-	"doteqdot" => 0x2251,
-	"dotminus" => 0x2238,
-	"dotplus" => 0x2214,
-	"dotsquare" => 0x22a1,
-	"doublebarwedge" => 0x2306,
-	"downarrow" => 0x2193,
-	"downdownarrows" => 0x21ca,
-	"downharpoonleft" => 0x21c3,
-	"downharpoonright" => 0x21c2,
-	"drbkarow" => 0x2910,
-	"drcorn" => 0x231f,
-	"drcrop" => 0x230c,
-	"dscr" => 0x1d4b9,
-	"dscy" => 0x455,
-	"dsol" => 0x29f6,
-	"dstrok" => 0x111,
-	"dtdot" => 0x22f1,
-	"dtri" => 0x25bf,
-	"dtrif" => 0x25be,
-	"duarr" => 0x21f5,
-	"duhar" => 0x296f,
-	"dwangle" => 0x29a6,
-	"dzcy" => 0x45f,
-	"dzigrarr" => 0x27ff,
-	"eDDot" => 0x2a77,
-	"eDot" => 0x2251,
-	"eacute" => 0xe9,
-	"easter" => 0x2a6e,
-	"ecaron" => 0x11b,
-	"ecir" => 0x2256,
-	"ecirc" => 0xea,
-	"ecolon" => 0x2255,
-	"ecy" => 0x44d,
-	"edot" => 0x117,
-	"ee" => 0x2147,
-	"efDot" => 0x2252,
-	"efr" => 0x1d522,
-	"eg" => 0x2a9a,
-	"egrave" => 0xe8,
-	"egs" => 0x2a96,
-	"egsdot" => 0x2a98,
-	"el" => 0x2a99,
-	"elinters" => 0x23e7,
-	"ell" => 0x2113,
-	"els" => 0x2a95,
-	"elsdot" => 0x2a97,
-	"emacr" => 0x113,
-	"empty" => 0x2205,
-	"emptyset" => 0x2205,
-	"emptyv" => 0x2205,
-	"emsp" => 0x2003,
-	"emsp13" => 0x2004,
-	"emsp14" => 0x2005,
-	"eng" => 0x14b,
-	"ensp" => 0x2002,
-	"eogon" => 0x119,
-	"eopf" => 0x1d556,
-	"epar" => 0x22d5,
-	"eparsl" => 0x29e3,
-	"eplus" => 0x2a71,
-	"epsi" => 0x3f5,
-	"epsilon" => 0x3b5,
-	"epsiv" => 0x3b5,
-	"eqcirc" => 0x2256,
-	"eqcolon" => 0x2255,
-	"eqsim" => 0x2242,
-	"eqslantgtr" => 0x2a96,
-	"eqslantless" => 0x2a95,
-	"equals" => 0x3d,
-	"equest" => 0x225f,
-	"equiv" => 0x2261,
-	"equivDD" => 0x2a78,
-	"eqvparsl" => 0x29e5,
-	"erDot" => 0x2253,
-	"erarr" => 0x2971,
-	"escr" => 0x212f,
-	"esdot" => 0x2250,
-	"esim" => 0x2242,
-	"eta" => 0x3b7,
-	"eth" => 0xf0,
-	"euml" => 0xeb,
-	"euro" => 0x20ac,
-	"excl" => 0x21,
-	"exist" => 0x2203,
-	"expectation" => 0x2130,
-	"exponentiale" => 0x2147,
-	"fallingdotseq" => 0x2252,
-	"fcy" => 0x444,
-	"female" => 0x2640,
-	"ffilig" => 0xfb03,
-	"fflig" => 0xfb00,
-	"ffllig" => 0xfb04,
-	"ffr" => 0x1d523,
-	"filig" => 0xfb01,
-	"flat" => 0x266d,
-	"fllig" => 0xfb02,
-	"fltns" => 0x25b1,
-	"fnof" => 0x192,
-	"fopf" => 0x1d557,
-	"forall" => 0x2200,
-	"fork" => 0x22d4,
-	"forkv" => 0x2ad9,
-	"fpartint" => 0x2a0d,
-	"frac12" => 0xbd,
-	"frac13" => 0x2153,
-	"frac14" => 0xbc,
-	"frac15" => 0x2155,
-	"frac16" => 0x2159,
-	"frac18" => 0x215b,
-	"frac23" => 0x2154,
-	"frac25" => 0x2156,
-	"frac34" => 0xbe,
-	"frac35" => 0x2157,
-	"frac38" => 0x215c,
-	"frac45" => 0x2158,
-	"frac56" => 0x215a,
-	"frac58" => 0x215d,
-	"frac78" => 0x215e,
-	"frasl" => 0x2044,
-	"frown" => 0x2322,
-	"fscr" => 0x1d4bb,
-	"gE" => 0x2267,
-	"gEl" => 0x2a8c,
-	"gacute" => 0x1f5,
-	"gamma" => 0x3b3,
-	"gammad" => 0x3dd,
-	"gap" => 0x2a86,
-	"gbreve" => 0x11f,
-	"gcirc" => 0x11d,
-	"gcy" => 0x433,
-	"gdot" => 0x121,
-	"ge" => 0x2265,
-	"gel" => 0x22db,
-	"geq" => 0x2265,
-	"geqq" => 0x2267,
-	"geqslant" => 0x2a7e,
-	"ges" => 0x2a7e,
-	"gescc" => 0x2aa9,
-	"gesdot" => 0x2a80,
-	"gesdoto" => 0x2a82,
-	"gesdotol" => 0x2a84,
-	"gesles" => 0x2a94,
-	"gfr" => 0x1d524,
-	"gg" => 0x226b,
-	"ggg" => 0x22d9,
-	"gimel" => 0x2137,
-	"gjcy" => 0x453,
-	"gl" => 0x2277,
-	"glE" => 0x2a92,
-	"gla" => 0x2aa5,
-	"glj" => 0x2aa4,
-	"gnE" => 0x2269,
-	"gnap" => 0x2a8a,
-	"gnapprox" => 0x2a8a,
-	"gne" => 0x2a88,
-	"gneq" => 0x2a88,
-	"gneqq" => 0x2269,
-	"gnsim" => 0x22e7,
-	"gopf" => 0x1d558,
-	"grave" => 0x60,
-	"gscr" => 0x210a,
-	"gsim" => 0x2273,
-	"gsime" => 0x2a8e,
-	"gsiml" => 0x2a90,
-	"gt" => 0x3e,
-	"gtcc" => 0x2aa7,
-	"gtcir" => 0x2a7a,
-	"gtdot" => 0x22d7,
-	"gtlPar" => 0x2995,
-	"gtquest" => 0x2a7c,
-	"gtrapprox" => 0x2a86,
-	"gtrarr" => 0x2978,
-	"gtrdot" => 0x22d7,
-	"gtreqless" => 0x22db,
-	"gtreqqless" => 0x2a8c,
-	"gtrless" => 0x2277,
-	"gtrsim" => 0x2273,
-	"hArr" => 0x21d4,
-	"hairsp" => 0x200a,
-	"half" => 0xbd,
-	"hamilt" => 0x210b,
-	"hardcy" => 0x44a,
-	"harr" => 0x2194,
-	"harrcir" => 0x2948,
-	"harrw" => 0x21ad,
-	"hbar" => 0x210f,
-	"hcirc" => 0x125,
-	"hearts" => 0x2665,
-	"heartsuit" => 0x2665,
-	"hellip" => 0x2026,
-	"hercon" => 0x22b9,
-	"hfr" => 0x1d525,
-	"hksearow" => 0x2925,
-	"hkswarow" => 0x2926,
-	"hoarr" => 0x21ff,
-	"homtht" => 0x223b,
-	"hookleftarrow" => 0x21a9,
-	"hookrightarrow" => 0x21aa,
-	"hopf" => 0x1d559,
-	"horbar" => 0x2015,
-	"hscr" => 0x1d4bd,
-	"hslash" => 0x210f,
-	"hstrok" => 0x127,
-	"hybull" => 0x2043,
-	"hyphen" => 0x2010,
-	"iacute" => 0xed,
-	"ic" => 0x2063,
-	"icirc" => 0xee,
-	"icy" => 0x438,
-	"iecy" => 0x435,
-	"iexcl" => 0xa1,
-	"iff" => 0x21d4,
-	"ifr" => 0x1d526,
-	"igrave" => 0xec,
-	"ii" => 0x2148,
-	"iiiint" => 0x2a0c,
-	"iiint" => 0x222d,
-	"iinfin" => 0x29dc,
-	"iiota" => 0x2129,
-	"ijlig" => 0x133,
-	"imacr" => 0x12b,
-	"image" => 0x2111,
-	"imagline" => 0x2110,
-	"imagpart" => 0x2111,
-	"imath" => 0x131,
-	"imof" => 0x22b7,
-	"imped" => 0x1b5,
-	"in" => 0x2208,
-	"incare" => 0x2105,
-	"infin" => 0x221e,
-	"infintie" => 0x29dd,
-	"inodot" => 0x131,
-	"int" => 0x222b,
-	"intcal" => 0x22ba,
-	"integers" => 0x2124,
-	"intercal" => 0x22ba,
-	"intlarhk" => 0x2a17,
-	"intprod" => 0x2a3c,
-	"iocy" => 0x451,
-	"iogon" => 0x12f,
-	"iopf" => 0x1d55a,
-	"iota" => 0x3b9,
-	"iprod" => 0x2a3c,
-	"iquest" => 0xbf,
-	"iscr" => 0x1d4be,
-	"isin" => 0x2208,
-	"isinE" => 0x22f9,
-	"isindot" => 0x22f5,
-	"isins" => 0x22f4,
-	"isinsv" => 0x22f3,
-	"isinv" => 0x2208,
-	"it" => 0x2062,
-	"itilde" => 0x129,
-	"iukcy" => 0x456,
-	"iuml" => 0xef,
-	"jcirc" => 0x135,
-	"jcy" => 0x439,
-	"jfr" => 0x1d527,
-	"jmath" => 0x237,
-	"jopf" => 0x1d55b,
-	"jscr" => 0x1d4bf,
-	"jsercy" => 0x458,
-	"jukcy" => 0x454,
-	"kappa" => 0x3ba,
-	"kappav" => 0x3f0,
-	"kcedil" => 0x137,
-	"kcy" => 0x43a,
-	"kfr" => 0x1d528,
-	"kgreen" => 0x138,
-	"khcy" => 0x445,
-	"kjcy" => 0x45c,
-	"kopf" => 0x1d55c,
-	"kscr" => 0x1d4c0,
-	"lAarr" => 0x21da,
-	"lArr" => 0x21d0,
-	"lAtail" => 0x291b,
-	"lBarr" => 0x290e,
-	"lE" => 0x2266,
-	"lEg" => 0x2a8b,
-	"lHar" => 0x2962,
-	"lacute" => 0x13a,
-	"laemptyv" => 0x29b4,
-	"lagran" => 0x2112,
-	"lambda" => 0x3bb,
-	"lang" => 0x27e8,
-	"langd" => 0x2991,
-	"langle" => 0x27e8,
-	"lap" => 0x2a85,
-	"laquo" => 0xab,
-	"larr" => 0x2190,
-	"larrb" => 0x21e4,
-	"larrbfs" => 0x291f,
-	"larrfs" => 0x291d,
-	"larrhk" => 0x21a9,
-	"larrlp" => 0x21ab,
-	"larrpl" => 0x2939,
-	"larrsim" => 0x2973,
-	"larrtl" => 0x21a2,
-	"lat" => 0x2aab,
-	"latail" => 0x2919,
-	"late" => 0x2aad,
-	"lbarr" => 0x290c,
-	"lbbrk" => 0x2772,
-	"lbrace" => 0x7b,
-	"lbrack" => 0x5b,
-	"lbrke" => 0x298b,
-	"lbrksld" => 0x298f,
-	"lbrkslu" => 0x298d,
-	"lcaron" => 0x13e,
-	"lcedil" => 0x13c,
-	"lceil" => 0x2308,
-	"lcub" => 0x7b,
-	"lcy" => 0x43b,
-	"ldca" => 0x2936,
-	"ldquo" => 0x201c,
-	"ldquor" => 0x201e,
-	"ldrdhar" => 0x2967,
-	"ldrushar" => 0x294b,
-	"ldsh" => 0x21b2,
-	"le" => 0x2264,
-	"leftarrow" => 0x2190,
-	"leftarrowtail" => 0x21a2,
-	"leftharpoondown" => 0x21bd,
-	"leftharpoonup" => 0x21bc,
-	"leftleftarrows" => 0x21c7,
-	"leftrightarrow" => 0x2194,
-	"leftrightarrows" => 0x21c6,
-	"leftrightharpoons" => 0x21cb,
-	"leftrightsquigarrow" => 0x21ad,
-	"leftthreetimes" => 0x22cb,
-	"leg" => 0x22da,
-	"leq" => 0x2264,
-	"leqq" => 0x2266,
-	"leqslant" => 0x2a7d,
-	"les" => 0x2a7d,
-	"lescc" => 0x2aa8,
-	"lesdot" => 0x2a7f,
-	"lesdoto" => 0x2a81,
-	"lesdotor" => 0x2a83,
-	"lesges" => 0x2a93,
-	"lessapprox" => 0x2a85,
-	"lessdot" => 0x22d6,
-	"lesseqgtr" => 0x22da,
-	"lesseqqgtr" => 0x2a8b,
-	"lessgtr" => 0x2276,
-	"lesssim" => 0x2272,
-	"lfisht" => 0x297c,
-	"lfloor" => 0x230a,
-	"lfr" => 0x1d529,
-	"lg" => 0x2276,
-	"lgE" => 0x2a91,
-	"lhard" => 0x21bd,
-	"lharu" => 0x21bc,
-	"lharul" => 0x296a,
-	"lhblk" => 0x2584,
-	"ljcy" => 0x459,
-	"ll" => 0x226a,
-	"llarr" => 0x21c7,
-	"llcorner" => 0x231e,
-	"llhard" => 0x296b,
-	"lltri" => 0x25fa,
-	"lmidot" => 0x140,
-	"lmoust" => 0x23b0,
-	"lmoustache" => 0x23b0,
-	"lnE" => 0x2268,
-	"lnap" => 0x2a89,
-	"lnapprox" => 0x2a89,
-	"lne" => 0x2a87,
-	"lneq" => 0x2a87,
-	"lneqq" => 0x2268,
-	"lnsim" => 0x22e6,
-	"loang" => 0x27ec,
-	"loarr" => 0x21fd,
-	"lobrk" => 0x27e6,
-	"longleftarrow" => 0x27f5,
-	"longleftrightarrow" => 0x27f7,
-	"longmapsto" => 0x27fc,
-	"longrightarrow" => 0x27f6,
-	"looparrowleft" => 0x21ab,
-	"looparrowright" => 0x21ac,
-	"lopar" => 0x2985,
-	"lopf" => 0x1d55d,
-	"loplus" => 0x2a2d,
-	"lotimes" => 0x2a34,
-	"lowast" => 0x2217,
-	"lowbar" => 0x5f,
-	"loz" => 0x25ca,
-	"lozenge" => 0x25ca,
-	"lozf" => 0x29eb,
-	"lpar" => 0x28,
-	"lparlt" => 0x2993,
-	"lrarr" => 0x21c6,
-	"lrcorner" => 0x231f,
-	"lrhar" => 0x21cb,
-	"lrhard" => 0x296d,
-	"lrm" => 0x200e,
-	"lrtri" => 0x22bf,
-	"lsaquo" => 0x2039,
-	"lscr" => 0x1d4c1,
-	"lsh" => 0x21b0,
-	"lsim" => 0x2272,
-	"lsime" => 0x2a8d,
-	"lsimg" => 0x2a8f,
-	"lsqb" => 0x5b,
-	"lsquo" => 0x2018,
-	"lsquor" => 0x201a,
-	"lstrok" => 0x142,
-	"lt" => 0x3c,
-	"ltcc" => 0x2aa6,
-	"ltcir" => 0x2a79,
-	"ltdot" => 0x22d6,
-	"lthree" => 0x22cb,
-	"ltimes" => 0x22c9,
-	"ltlarr" => 0x2976,
-	"ltquest" => 0x2a7b,
-	"ltrPar" => 0x2996,
-	"ltri" => 0x25c3,
-	"ltrie" => 0x22b4,
-	"ltrif" => 0x25c2,
-	"lurdshar" => 0x294a,
-	"luruhar" => 0x2966,
-	"mDDot" => 0x223a,
-	"macr" => 0xaf,
-	"male" => 0x2642,
-	"malt" => 0x2720,
-	"maltese" => 0x2720,
-	"map" => 0x21a6,
-	"mapsto" => 0x21a6,
-	"mapstodown" => 0x21a7,
-	"mapstoleft" => 0x21a4,
-	"mapstoup" => 0x21a5,
-	"marker" => 0x25ae,
-	"mcomma" => 0x2a29,
-	"mcy" => 0x43c,
-	"mdash" => 0x2014,
-	"measuredangle" => 0x2221,
-	"mfr" => 0x1d52a,
-	"mho" => 0x2127,
-	"micro" => 0xb5,
-	"mid" => 0x2223,
-	"midast" => 0x2a,
-	"midcir" => 0x2af0,
-	"middot" => 0xb7,
-	"minus" => 0x2212,
-	"minusb" => 0x229f,
-	"minusd" => 0x2238,
-	"minusdu" => 0x2a2a,
-	"mlcp" => 0x2adb,
-	"mldr" => 0x2026,
-	"mnplus" => 0x2213,
-	"models" => 0x22a7,
-	"mopf" => 0x1d55e,
-	"mp" => 0x2213,
-	"mscr" => 0x1d4c2,
-	"mstpos" => 0x223e,
-	"mu" => 0x3bc,
-	"multimap" => 0x22b8,
-	"mumap" => 0x22b8,
-	"nLeftarrow" => 0x21cd,
-	"nLeftrightarrow" => 0x21ce,
-	"nRightarrow" => 0x21cf,
-	"nVDash" => 0x22af,
-	"nVdash" => 0x22ae,
-	"nabla" => 0x2207,
-	"nacute" => 0x144,
-	"nap" => 0x2249,
-	"napos" => 0x149,
-	"napprox" => 0x2249,
-	"natur" => 0x266e,
-	"natural" => 0x266e,
-	"naturals" => 0x2115,
-	"nbsp" => 0xa0,
-	"ncap" => 0x2a43,
-	"ncaron" => 0x148,
-	"ncedil" => 0x146,
-	"ncong" => 0x2247,
-	"ncup" => 0x2a42,
-	"ncy" => 0x43d,
-	"ndash" => 0x2013,
-	"ne" => 0x2260,
-	"neArr" => 0x21d7,
-	"nearhk" => 0x2924,
-	"nearr" => 0x2197,
-	"nearrow" => 0x2197,
-	"nequiv" => 0x2262,
-	"nesear" => 0x2928,
-	"nexist" => 0x2204,
-	"nexists" => 0x2204,
-	"nfr" => 0x1d52b,
-	"nge" => 0x2271,
-	"ngeq" => 0x2271,
-	"ngsim" => 0x2275,
-	"ngt" => 0x226f,
-	"ngtr" => 0x226f,
-	"nhArr" => 0x21ce,
-	"nharr" => 0x21ae,
-	"nhpar" => 0x2af2,
-	"ni" => 0x220b,
-	"nis" => 0x22fc,
-	"nisd" => 0x22fa,
-	"niv" => 0x220b,
-	"njcy" => 0x45a,
-	"nlArr" => 0x21cd,
-	"nlarr" => 0x219a,
-	"nldr" => 0x2025,
-	"nle" => 0x2270,
-	"nleftarrow" => 0x219a,
-	"nleftrightarrow" => 0x21ae,
-	"nleq" => 0x2270,
-	"nless" => 0x226e,
-	"nlsim" => 0x2274,
-	"nlt" => 0x226e,
-	"nltri" => 0x22ea,
-	"nltrie" => 0x22ec,
-	"nmid" => 0x2224,
-	"nopf" => 0x1d55f,
-	"not" => 0xac,
-	"notin" => 0x2209,
-	"notinva" => 0x2209,
-	"notinvb" => 0x22f7,
-	"notinvc" => 0x22f6,
-	"notni" => 0x220c,
-	"notniva" => 0x220c,
-	"notnivb" => 0x22fe,
-	"notnivc" => 0x22fd,
-	"npar" => 0x2226,
-	"nparallel" => 0x2226,
-	"npolint" => 0x2a14,
-	"npr" => 0x2280,
-	"nprcue" => 0x22e0,
-	"nprec" => 0x2280,
-	"nrArr" => 0x21cf,
-	"nrarr" => 0x219b,
-	"nrightarrow" => 0x219b,
-	"nrtri" => 0x22eb,
-	"nrtrie" => 0x22ed,
-	"nsc" => 0x2281,
-	"nsccue" => 0x22e1,
-	"nscr" => 0x1d4c3,
-	"nshortmid" => 0x2224,
-	"nshortparallel" => 0x2226,
-	"nsim" => 0x2241,
-	"nsime" => 0x2244,
-	"nsimeq" => 0x2244,
-	"nsmid" => 0x2224,
-	"nspar" => 0x2226,
-	"nsqsube" => 0x22e2,
-	"nsqsupe" => 0x22e3,
-	"nsub" => 0x2284,
-	"nsube" => 0x2288,
-	"nsubseteq" => 0x2288,
-	"nsucc" => 0x2281,
-	"nsup" => 0x2285,
-	"nsupe" => 0x2289,
-	"nsupseteq" => 0x2289,
-	"ntgl" => 0x2279,
-	"ntilde" => 0xf1,
-	"ntlg" => 0x2278,
-	"ntriangleleft" => 0x22ea,
-	"ntrianglelefteq" => 0x22ec,
-	"ntriangleright" => 0x22eb,
-	"ntrianglerighteq" => 0x22ed,
-	"nu" => 0x3bd,
-	"num" => 0x23,
-	"numero" => 0x2116,
-	"numsp" => 0x2007,
-	"nvDash" => 0x22ad,
-	"nvHarr" => 0x2904,
-	"nvdash" => 0x22ac,
-	"nvinfin" => 0x29de,
-	"nvlArr" => 0x2902,
-	"nvrArr" => 0x2903,
-	"nwArr" => 0x21d6,
-	"nwarhk" => 0x2923,
-	"nwarr" => 0x2196,
-	"nwarrow" => 0x2196,
-	"nwnear" => 0x2927,
-	"oS" => 0x24c8,
-	"oacute" => 0xf3,
-	"oast" => 0x229b,
-	"ocir" => 0x229a,
-	"ocirc" => 0xf4,
-	"ocy" => 0x43e,
-	"odash" => 0x229d,
-	"odblac" => 0x151,
-	"odiv" => 0x2a38,
-	"odot" => 0x2299,
-	"odsold" => 0x29bc,
-	"oelig" => 0x153,
-	"ofcir" => 0x29bf,
-	"ofr" => 0x1d52c,
-	"ogon" => 0x2db,
-	"ograve" => 0xf2,
-	"ogt" => 0x29c1,
-	"ohbar" => 0x29b5,
-	"ohm" => 0x2126,
-	"oint" => 0x222e,
-	"olarr" => 0x21ba,
-	"olcir" => 0x29be,
-	"olcross" => 0x29bb,
-	"oline" => 0x203e,
-	"olt" => 0x29c0,
-	"omacr" => 0x14d,
-	"omega" => 0x3c9,
-	"omicron" => 0x3bf,
-	"omid" => 0x29b6,
-	"ominus" => 0x2296,
-	"oopf" => 0x1d560,
-	"opar" => 0x29b7,
-	"operp" => 0x29b9,
-	"oplus" => 0x2295,
-	"or" => 0x2228,
-	"orarr" => 0x21bb,
-	"ord" => 0x2a5d,
-	"order" => 0x2134,
-	"orderof" => 0x2134,
-	"ordf" => 0xaa,
-	"ordm" => 0xba,
-	"origof" => 0x22b6,
-	"oror" => 0x2a56,
-	"orslope" => 0x2a57,
-	"orv" => 0x2a5b,
-	"oscr" => 0x2134,
-	"oslash" => 0xf8,
-	"osol" => 0x2298,
-	"otilde" => 0xf5,
-	"otimes" => 0x2297,
-	"otimesas" => 0x2a36,
-	"ouml" => 0xf6,
-	"ovbar" => 0x233d,
-	"par" => 0x2225,
-	"para" => 0xb6,
-	"parallel" => 0x2225,
-	"parsim" => 0x2af3,
-	"parsl" => 0x2afd,
-	"part" => 0x2202,
-	"pcy" => 0x43f,
-	"percnt" => 0x25,
-	"period" => 0x2e,
-	"permil" => 0x2030,
-	"perp" => 0x22a5,
-	"pertenk" => 0x2031,
-	"pfr" => 0x1d52d,
-	"phi" => 0x3c6,
-	"phiv" => 0x3c6,
-	"phmmat" => 0x2133,
-	"phone" => 0x260e,
-	"pi" => 0x3c0,
-	"pitchfork" => 0x22d4,
-	"piv" => 0x3d6,
-	"planck" => 0x210f,
-	"planckh" => 0x210e,
-	"plankv" => 0x210f,
-	"plus" => 0x2b,
-	"plusacir" => 0x2a23,
-	"plusb" => 0x229e,
-	"pluscir" => 0x2a22,
-	"plusdo" => 0x2214,
-	"plusdu" => 0x2a25,
-	"pluse" => 0x2a72,
-	"plusmn" => 0xb1,
-	"plussim" => 0x2a26,
-	"plustwo" => 0x2a27,
-	"pm" => 0xb1,
-	"pointint" => 0x2a15,
-	"popf" => 0x1d561,
-	"pound" => 0xa3,
-	"pr" => 0x227a,
-	"prE" => 0x2ab3,
-	"prap" => 0x2ab7,
-	"prcue" => 0x227c,
-	"pre" => 0x2aaf,
-	"prec" => 0x227a,
-	"precapprox" => 0x2ab7,
-	"preccurlyeq" => 0x227c,
-	"preceq" => 0x2aaf,
-	"precnapprox" => 0x2ab9,
-	"precneqq" => 0x2ab5,
-	"precnsim" => 0x22e8,
-	"precsim" => 0x227e,
-	"prime" => 0x2032,
-	"primes" => 0x2119,
-	"prnE" => 0x2ab5,
-	"prnap" => 0x2ab9,
-	"prnsim" => 0x22e8,
-	"prod" => 0x220f,
-	"profalar" => 0x232e,
-	"profline" => 0x2312,
-	"profsurf" => 0x2313,
-	"prop" => 0x221d,
-	"propto" => 0x221d,
-	"prsim" => 0x227e,
-	"prurel" => 0x22b0,
-	"pscr" => 0x1d4c5,
-	"psi" => 0x3c8,
-	"puncsp" => 0x2008,
-	"qfr" => 0x1d52e,
-	"qint" => 0x2a0c,
-	"qopf" => 0x1d562,
-	"qprime" => 0x2057,
-	"qscr" => 0x1d4c6,
-	"quaternions" => 0x210d,
-	"quatint" => 0x2a16,
-	"quest" => 0x3f,
-	"questeq" => 0x225f,
-	"quot" => 0x22,
-	"rAarr" => 0x21db,
-	"rArr" => 0x21d2,
-	"rAtail" => 0x291c,
-	"rBarr" => 0x290f,
-	"rHar" => 0x2964,
-	"race" => 0x29da,
-	"racute" => 0x155,
-	"radic" => 0x221a,
-	"raemptyv" => 0x29b3,
-	"rang" => 0x27e9,
-	"rangd" => 0x2992,
-	"range" => 0x29a5,
-	"rangle" => 0x27e9,
-	"raquo" => 0xbb,
-	"rarr" => 0x2192,
-	"rarrap" => 0x2975,
-	"rarrb" => 0x21e5,
-	"rarrbfs" => 0x2920,
-	"rarrc" => 0x2933,
-	"rarrfs" => 0x291e,
-	"rarrhk" => 0x21aa,
-	"rarrlp" => 0x21ac,
-	"rarrpl" => 0x2945,
-	"rarrsim" => 0x2974,
-	"rarrtl" => 0x21a3,
-	"rarrw" => 0x219d,
-	"ratail" => 0x291a,
-	"ratio" => 0x2236,
-	"rationals" => 0x211a,
-	"rbarr" => 0x290d,
-	"rbbrk" => 0x2773,
-	"rbrace" => 0x7d,
-	"rbrack" => 0x5d,
-	"rbrke" => 0x298c,
-	"rbrksld" => 0x298e,
-	"rbrkslu" => 0x2990,
-	"rcaron" => 0x159,
-	"rcedil" => 0x157,
-	"rceil" => 0x2309,
-	"rcub" => 0x7d,
-	"rcy" => 0x440,
-	"rdca" => 0x2937,
-	"rdldhar" => 0x2969,
-	"rdquo" => 0x201d,
-	"rdquor" => 0x201d,
-	"rdsh" => 0x21b3,
-	"real" => 0x211c,
-	"realine" => 0x211b,
-	"realpart" => 0x211c,
-	"reals" => 0x211d,
-	"rect" => 0x25ad,
-	"reg" => 0xae,
-	"rfisht" => 0x297d,
-	"rfloor" => 0x230b,
-	"rfr" => 0x1d52f,
-	"rhard" => 0x21c1,
-	"rharu" => 0x21c0,
-	"rharul" => 0x296c,
-	"rho" => 0x3c1,
-	"rhov" => 0x3f1,
-	"rightarrow" => 0x2192,
-	"rightarrowtail" => 0x21a3,
-	"rightharpoondown" => 0x21c1,
-	"rightharpoonup" => 0x21c0,
-	"rightleftarrows" => 0x21c4,
-	"rightleftharpoons" => 0x21cc,
-	"rightrightarrows" => 0x21c9,
-	"rightsquigarrow" => 0x219d,
-	"rightthreetimes" => 0x22cc,
-	"ring" => 0x2da,
-	"risingdotseq" => 0x2253,
-	"rlarr" => 0x21c4,
-	"rlhar" => 0x21cc,
-	"rlm" => 0x200f,
-	"rmoust" => 0x23b1,
-	"rmoustache" => 0x23b1,
-	"rnmid" => 0x2aee,
-	"roang" => 0x27ed,
-	"roarr" => 0x21fe,
-	"robrk" => 0x27e7,
-	"ropar" => 0x2986,
-	"ropf" => 0x1d563,
-	"roplus" => 0x2a2e,
-	"rotimes" => 0x2a35,
-	"rpar" => 0x29,
-	"rpargt" => 0x2994,
-	"rppolint" => 0x2a12,
-	"rrarr" => 0x21c9,
-	"rsaquo" => 0x203a,
-	"rscr" => 0x1d4c7,
-	"rsh" => 0x21b1,
-	"rsqb" => 0x5d,
-	"rsquo" => 0x2019,
-	"rsquor" => 0x2019,
-	"rthree" => 0x22cc,
-	"rtimes" => 0x22ca,
-	"rtri" => 0x25b9,
-	"rtrie" => 0x22b5,
-	"rtrif" => 0x25b8,
-	"rtriltri" => 0x29ce,
-	"ruluhar" => 0x2968,
-	"rx" => 0x211e,
-	"sacute" => 0x15b,
-	"sbquo" => 0x201a,
-	"sc" => 0x227b,
-	"scE" => 0x2ab4,
-	"scap" => 0x2ab8,
-	"scaron" => 0x161,
-	"sccue" => 0x227d,
-	"sce" => 0x2ab0,
-	"scedil" => 0x15f,
-	"scirc" => 0x15d,
-	"scnE" => 0x2ab6,
-	"scnap" => 0x2aba,
-	"scnsim" => 0x22e9,
-	"scpolint" => 0x2a13,
-	"scsim" => 0x227f,
-	"scy" => 0x441,
-	"sdot" => 0x22c5,
-	"sdotb" => 0x22a1,
-	"sdote" => 0x2a66,
-	"seArr" => 0x21d8,
-	"searhk" => 0x2925,
-	"searr" => 0x2198,
-	"searrow" => 0x2198,
-	"sect" => 0xa7,
-	"semi" => 0x3b,
-	"seswar" => 0x2929,
-	"setminus" => 0x2216,
-	"setmn" => 0x2216,
-	"sext" => 0x2736,
-	"sfr" => 0x1d530,
-	"sfrown" => 0x2322,
-	"sharp" => 0x266f,
-	"shchcy" => 0x449,
-	"shcy" => 0x448,
-	"shortmid" => 0x2223,
-	"shortparallel" => 0x2225,
-	"shy" => 0xad,
-	"sigma" => 0x3c3,
-	"sigmaf" => 0x3c2,
-	"sigmav" => 0x3c2,
-	"sim" => 0x223c,
-	"simdot" => 0x2a6a,
-	"sime" => 0x2243,
-	"simeq" => 0x2243,
-	"simg" => 0x2a9e,
-	"simgE" => 0x2aa0,
-	"siml" => 0x2a9d,
-	"simlE" => 0x2a9f,
-	"simne" => 0x2246,
-	"simplus" => 0x2a24,
-	"simrarr" => 0x2972,
-	"slarr" => 0x2190,
-	"smallsetminus" => 0x2216,
-	"smashp" => 0x2a33,
-	"smeparsl" => 0x29e4,
-	"smid" => 0x2223,
-	"smile" => 0x2323,
-	"smt" => 0x2aaa,
-	"smte" => 0x2aac,
-	"softcy" => 0x44c,
-	"sol" => 0x2f,
-	"solb" => 0x29c4,
-	"solbar" => 0x233f,
-	"sopf" => 0x1d564,
-	"spades" => 0x2660,
-	"spadesuit" => 0x2660,
-	"spar" => 0x2225,
-	"sqcap" => 0x2293,
-	"sqcup" => 0x2294,
-	"sqsub" => 0x228f,
-	"sqsube" => 0x2291,
-	"sqsubset" => 0x228f,
-	"sqsubseteq" => 0x2291,
-	"sqsup" => 0x2290,
-	"sqsupe" => 0x2292,
-	"sqsupset" => 0x2290,
-	"sqsupseteq" => 0x2292,
-	"squ" => 0x25a1,
-	"square" => 0x25a1,
-	"squarf" => 0x25aa,
-	"squf" => 0x25aa,
-	"srarr" => 0x2192,
-	"sscr" => 0x1d4c8,
-	"ssetmn" => 0x2216,
-	"ssmile" => 0x2323,
-	"sstarf" => 0x22c6,
-	"star" => 0x2606,
-	"starf" => 0x2605,
-	"straightepsilon" => 0x3f5,
-	"straightphi" => 0x3d5,
-	"strns" => 0xaf,
-	"sub" => 0x2282,
-	"subE" => 0x2ac5,
-	"subdot" => 0x2abd,
-	"sube" => 0x2286,
-	"subedot" => 0x2ac3,
-	"submult" => 0x2ac1,
-	"subnE" => 0x2acb,
-	"subne" => 0x228a,
-	"subplus" => 0x2abf,
-	"subrarr" => 0x2979,
-	"subset" => 0x2282,
-	"subseteq" => 0x2286,
-	"subseteqq" => 0x2ac5,
-	"subsetneq" => 0x228a,
-	"subsetneqq" => 0x2acb,
-	"subsim" => 0x2ac7,
-	"subsub" => 0x2ad5,
-	"subsup" => 0x2ad3,
-	"succ" => 0x227b,
-	"succapprox" => 0x2ab8,
-	"succcurlyeq" => 0x227d,
-	"succeq" => 0x2ab0,
-	"succnapprox" => 0x2aba,
-	"succneqq" => 0x2ab6,
-	"succnsim" => 0x22e9,
-	"succsim" => 0x227f,
-	"sum" => 0x2211,
-	"sung" => 0x266a,
-	"sup" => 0x2283,
-	"sup1" => 0xb9,
-	"sup2" => 0xb2,
-	"sup3" => 0xb3,
-	"supE" => 0x2ac6,
-	"supdot" => 0x2abe,
-	"supdsub" => 0x2ad8,
-	"supe" => 0x2287,
-	"supedot" => 0x2ac4,
-	"suphsub" => 0x2ad7,
-	"suplarr" => 0x297b,
-	"supmult" => 0x2ac2,
-	"supnE" => 0x2acc,
-	"supne" => 0x228b,
-	"supplus" => 0x2ac0,
-	"supset" => 0x2283,
-	"supseteq" => 0x2287,
-	"supseteqq" => 0x2ac6,
-	"supsetneq" => 0x228b,
-	"supsetneqq" => 0x2acc,
-	"supsim" => 0x2ac8,
-	"supsub" => 0x2ad4,
-	"supsup" => 0x2ad6,
-	"swArr" => 0x21d9,
-	"swarhk" => 0x2926,
-	"swarr" => 0x2199,
-	"swarrow" => 0x2199,
-	"swnwar" => 0x292a,
-	"szlig" => 0xdf,
-	"target" => 0x2316,
-	"tau" => 0x3c4,
-	"tbrk" => 0x23b4,
-	"tcaron" => 0x165,
-	"tcedil" => 0x163,
-	"tcy" => 0x442,
-	"tdot" => 0x20db,
-	"telrec" => 0x2315,
-	"tfr" => 0x1d531,
-	"there4" => 0x2234,
-	"therefore" => 0x2234,
-	"theta" => 0x3b8,
-	"thetasym" => 0x3d1,
-	"thetav" => 0x3d1,
-	"thickapprox" => 0x2248,
-	"thicksim" => 0x223c,
-	"thinsp" => 0x2009,
-	"thkap" => 0x2248,
-	"thksim" => 0x223c,
-	"thorn" => 0xfe,
-	"tilde" => 0x2dc,
-	"times" => 0xd7,
-	"timesb" => 0x22a0,
-	"timesbar" => 0x2a31,
-	"timesd" => 0x2a30,
-	"tint" => 0x222d,
-	"toea" => 0x2928,
-	"top" => 0x22a4,
-	"topbot" => 0x2336,
-	"topcir" => 0x2af1,
-	"topf" => 0x1d565,
-	"topfork" => 0x2ada,
-	"tosa" => 0x2929,
-	"tprime" => 0x2034,
-	"trade" => 0x2122,
-	"triangle" => 0x25b5,
-	"triangledown" => 0x25bf,
-	"triangleleft" => 0x25c3,
-	"trianglelefteq" => 0x22b4,
-	"triangleq" => 0x225c,
-	"triangleright" => 0x25b9,
-	"trianglerighteq" => 0x22b5,
-	"tridot" => 0x25ec,
-	"trie" => 0x225c,
-	"triminus" => 0x2a3a,
-	"triplus" => 0x2a39,
-	"trisb" => 0x29cd,
-	"tritime" => 0x2a3b,
-	"trpezium" => 0x23e2,
-	"tscr" => 0x1d4c9,
-	"tscy" => 0x446,
-	"tshcy" => 0x45b,
-	"tstrok" => 0x167,
-	"twixt" => 0x226c,
-	"twoheadleftarrow" => 0x219e,
-	"twoheadrightarrow" => 0x21a0,
-	"uArr" => 0x21d1,
-	"uHar" => 0x2963,
-	"uacute" => 0xfa,
-	"uarr" => 0x2191,
-	"ubrcy" => 0x45e,
-	"ubreve" => 0x16d,
-	"ucirc" => 0xfb,
-	"ucy" => 0x443,
-	"udarr" => 0x21c5,
-	"udblac" => 0x171,
-	"udhar" => 0x296e,
-	"ufisht" => 0x297e,
-	"ufr" => 0x1d532,
-	"ugrave" => 0xf9,
-	"uharl" => 0x21bf,
-	"uharr" => 0x21be,
-	"uhblk" => 0x2580,
-	"ulcorn" => 0x231c,
-	"ulcorner" => 0x231c,
-	"ulcrop" => 0x230f,
-	"ultri" => 0x25f8,
-	"umacr" => 0x16b,
-	"uml" => 0xa8,
-	"uogon" => 0x173,
-	"uopf" => 0x1d566,
-	"uparrow" => 0x2191,
-	"updownarrow" => 0x2195,
-	"upharpoonleft" => 0x21bf,
-	"upharpoonright" => 0x21be,
-	"uplus" => 0x228e,
-	"upsi" => 0x3c5,
-	"upsih" => 0x3d2,
-	"upsilon" => 0x3c5,
-	"upuparrows" => 0x21c8,
-	"urcorn" => 0x231d,
-	"urcorner" => 0x231d,
-	"urcrop" => 0x230e,
-	"uring" => 0x16f,
-	"urtri" => 0x25f9,
-	"uscr" => 0x1d4ca,
-	"utdot" => 0x22f0,
-	"utilde" => 0x169,
-	"utri" => 0x25b5,
-	"utrif" => 0x25b4,
-	"uuarr" => 0x21c8,
-	"uuml" => 0xfc,
-	"uwangle" => 0x29a7,
-	"vArr" => 0x21d5,
-	"vBar" => 0x2ae8,
-	"vBarv" => 0x2ae9,
-	"vDash" => 0x22a8,
-	"vangrt" => 0x299c,
-	"varepsilon" => 0x3b5,
-	"varkappa" => 0x3f0,
-	"varnothing" => 0x2205,
-	"varphi" => 0x3c6,
-	"varpi" => 0x3d6,
-	"varpropto" => 0x221d,
-	"varr" => 0x2195,
-	"varrho" => 0x3f1,
-	"varsigma" => 0x3c2,
-	"vartheta" => 0x3d1,
-	"vartriangleleft" => 0x22b2,
-	"vartriangleright" => 0x22b3,
-	"vcy" => 0x432,
-	"vdash" => 0x22a2,
-	"vee" => 0x2228,
-	"veebar" => 0x22bb,
-	"veeeq" => 0x225a,
-	"vellip" => 0x22ee,
-	"verbar" => 0x7c,
-	"vert" => 0x7c,
-	"vfr" => 0x1d533,
-	"vltri" => 0x22b2,
-	"vopf" => 0x1d567,
-	"vprop" => 0x221d,
-	"vrtri" => 0x22b3,
-	"vscr" => 0x1d4cb,
-	"vzigzag" => 0x299a,
-	"wcirc" => 0x175,
-	"wedbar" => 0x2a5f,
-	"wedge" => 0x2227,
-	"wedgeq" => 0x2259,
-	"weierp" => 0x2118,
-	"wfr" => 0x1d534,
-	"wopf" => 0x1d568,
-	"wp" => 0x2118,
-	"wr" => 0x2240,
-	"wreath" => 0x2240,
-	"wscr" => 0x1d4cc,
-	"xcap" => 0x22c2,
-	"xcirc" => 0x25ef,
-	"xcup" => 0x22c3,
-	"xdtri" => 0x25bd,
-	"xfr" => 0x1d535,
-	"xhArr" => 0x27fa,
-	"xharr" => 0x27f7,
-	"xi" => 0x3be,
-	"xlArr" => 0x27f8,
-	"xlarr" => 0x27f5,
-	"xmap" => 0x27fc,
-	"xnis" => 0x22fb,
-	"xodot" => 0x2a00,
-	"xopf" => 0x1d569,
-	"xoplus" => 0x2a01,
-	"xotime" => 0x2a02,
-	"xrArr" => 0x27f9,
-	"xrarr" => 0x27f6,
-	"xscr" => 0x1d4cd,
-	"xsqcup" => 0x2a06,
-	"xuplus" => 0x2a04,
-	"xutri" => 0x25b3,
-	"xvee" => 0x22c1,
-	"xwedge" => 0x22c0,
-	"yacute" => 0xfd,
-	"yacy" => 0x44f,
-	"ycirc" => 0x177,
-	"ycy" => 0x44b,
-	"yen" => 0xa5,
-	"yfr" => 0x1d536,
-	"yicy" => 0x457,
-	"yopf" => 0x1d56a,
-	"yscr" => 0x1d4ce,
-	"yucy" => 0x44e,
-	"yuml" => 0xff,
-	"zacute" => 0x17a,
-	"zcaron" => 0x17e,
-	"zcy" => 0x437,
-	"zdot" => 0x17c,
-	"zeetrf" => 0x2128,
-	"zeta" => 0x3b6,
-	"zfr" => 0x1d537,
-	"zhcy" => 0x436,
-	"zigrarr" => 0x21dd,
-	"zopf" => 0x1d56b,
-	"zscr" => 0x1d4cf,
-	"zwj" => 0x200d,
-	"zwnj" => 0x200c,
-};
-
-fn is_valid_entity_reference_name_char(c: char) -> bool {
-    c.is_ascii_digit() || c.is_ascii_alphabetic()
-}
-
-fn get_entity_reference_code_point(name: &str) -> Option<u32> {
-    ENTITY_REFERENCES[name]
-}
diff --git a/src/rule/tag/content.rs b/src/rule/tag/content.rs
deleted file mode 100644
index 3166636..0000000
--- a/src/rule/tag/content.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static CONTENT_TAGS: Set<&'static str> = phf_set! {
-	"address",
-	"audio",
-	"button",
-	"canvas",
-	"caption",
-	"figcaption",
-	"h1",
-	"h2",
-	"h3",
-	"h4",
-	"h5",
-	"h6",
-	"legend",
-	"meter",
-	"object",
-	"option",
-	"p",
-	"summary", // Can also contain a heading.
-	"textarea",
-	"video",
-};
diff --git a/src/rule/tag/contentfirst.rs b/src/rule/tag/contentfirst.rs
deleted file mode 100644
index 5acc837..0000000
--- a/src/rule/tag/contentfirst.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static CONTENT_FIRST_TAGS: Set<&'static str> = phf_set! {
-	"dd",
-	"details",
-	"dt",
-	"iframe",
-	"label",
-	"li",
-	"noscript",
-	"output",
-	"progress",
-	"slot",
-	"td",
-	"template",
-	"th",
-};
diff --git a/src/rule/tag/formatting.rs b/src/rule/tag/formatting.rs
deleted file mode 100644
index 92c1b10..0000000
--- a/src/rule/tag/formatting.rs
+++ /dev/null
@@ -1,35 +0,0 @@
-use ::phf::{phf_set, Set};
-
-// Difference to MDN's inline text semantics list: -br, +del, +ins
-static FORMATTING_TAGS: Set<&'static str> = phf_set! {
-	"a",
-	"abbr",
-	"b",
-	"bdi",
-	"bdo",
-	"cite",
-	"data",
-	"del",
-	"dfn",
-	"em",
-	"i",
-	"ins",
-	"kbd",
-	"mark",
-	"q",
-	"rp",
-	"rt",
-	"rtc",
-	"ruby",
-	"s",
-	"samp",
-	"small",
-	"span",
-	"strong",
-	"sub",
-	"sup",
-	"time",
-	"u",
-	"var",
-	"wbr",
-};
diff --git a/src/rule/tag/heading.rs b/src/rule/tag/heading.rs
deleted file mode 100644
index e58bd95..0000000
--- a/src/rule/tag/heading.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static HEADING_TAGS: Set<&'static str> = phf_set! {
-	"hgroup",
-	"h1",
-	"h2",
-	"h3",
-	"h4",
-	"h5",
-	"h6",
-};
diff --git a/src/rule/tag/html.rs b/src/rule/tag/html.rs
deleted file mode 100644
index 48b6190..0000000
--- a/src/rule/tag/html.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-use ::phf::{phf_set, Set};
-
-// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element at 2018-07-01T05:55:00Z.
-static HTML_TAGS: Set<&'static str> = phf_set! {
-	"a",
-	"abbr",
-	"acronym",
-	"address",
-	"applet",
-	"applet",
-	"area",
-	"article",
-	"aside",
-	"audio",
-	"b",
-	"basefont",
-	"bdi",
-	"bdo",
-	"bgsound",
-	"big",
-	"blink",
-	"blockquote",
-	"body",
-	"br",
-	"button",
-	"canvas",
-	"caption",
-	"center",
-	"cite",
-	"code",
-	"col",
-	"colgroup",
-	"command",
-	"content",
-	"content",
-	"data",
-	"datalist",
-	"dd",
-	"del",
-	"details",
-	"dfn",
-	"dialog",
-	"dir",
-	"dir",
-	"div",
-	"dl",
-	"dt",
-	"element",
-	"element",
-	"em",
-	"embed",
-	"fieldset",
-	"figcaption",
-	"figure",
-	"font",
-	"footer",
-	"form",
-	"frame",
-	"frameset",
-	"h1",
-	"h2",
-	"h3",
-	"h4",
-	"h5",
-	"h6",
-	"head",
-	"header",
-	"hgroup",
-	"hr",
-	"html",
-	"i",
-	"iframe",
-	"image",
-	"img",
-	"input",
-	"ins",
-	"isindex",
-	"kbd",
-	"keygen",
-	"label",
-	"legend",
-	"li",
-	"link",
-	"listing",
-	"main",
-	"map",
-	"mark",
-	"marquee",
-	"menu",
-	"menuitem",
-	"menuitem",
-	"meta",
-	"meter",
-	"multicol",
-	"nav",
-	"nextid",
-	"nobr",
-	"noembed",
-	"noembed",
-	"noframes",
-	"noscript",
-	"object",
-	"ol",
-	"optgroup",
-	"option",
-	"output",
-	"p",
-	"param",
-	"picture",
-	"plaintext",
-	"pre",
-	"progress",
-	"q",
-	"rp",
-	"rt",
-	"rtc",
-	"ruby",
-	"s",
-	"samp",
-	"script",
-	"section",
-	"select",
-	"shadow",
-	"shadow",
-	"slot",
-	"small",
-	"source",
-	"spacer",
-	"span",
-	"strike",
-	"strong",
-	"style",
-	"sub",
-	"summary",
-	"sup",
-	"table",
-	"tbody",
-	"td",
-	"template",
-	"textarea",
-	"tfoot",
-	"th",
-	"thead",
-	"time",
-	"title",
-	"tr",
-	"track",
-	"tt",
-	"tt",
-	"u",
-	"ul",
-	"var",
-	"video",
-	"wbr",
-	"xmp",
-};
diff --git a/src/rule/tag/layout.rs b/src/rule/tag/layout.rs
deleted file mode 100644
index da19653..0000000
--- a/src/rule/tag/layout.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static LAYOUT_TAGS: Set<&'static str> = phf_set! {
-    // Sectioning tags.
-	"article",
-	"aside",
-	"nav",
-	"section",
-	// Other tags.
-	"blockquote",
-	"body",
-	"colgroup",
-	"datalist",
-	"dialog",
-	"div",
-	"dl",
-	"fieldset",
-	"figure",
-	"footer",
-	"form",
-	"head",
-	"header",
-	"hgroup",
-	"html",
-	"main",
-	"map",
-	"menu",
-	"nav",
-	"ol",
-	"optgroup",
-	"picture",
-	"section",
-	"select",
-	"table",
-	"tbody",
-	"tfoot",
-	"thead",
-	"tr",
-	"ul",
-};
diff --git a/src/rule/tag/media.rs b/src/rule/tag/media.rs
deleted file mode 100644
index 8b3fb7e..0000000
--- a/src/rule/tag/media.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static MEDIA_TAGS: Set<&'static str> = phf_set! {
-	"audio",
-	"video",
-};
diff --git a/src/rule/tag/name.rs b/src/rule/tag/name.rs
deleted file mode 100644
index f4906b1..0000000
--- a/src/rule/tag/name.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn is_valid_tag_name_char(c: char) -> bool {
-    c.is_ascii_alphabetic() || c.is_ascii_digit() || c == ':' || c == '-'
-}
diff --git a/src/rule/tag/sectioning.rs b/src/rule/tag/sectioning.rs
deleted file mode 100644
index eefe35f..0000000
--- a/src/rule/tag/sectioning.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static SECTIONING_TAGS: Set<&'static str> = phf_set! {
-    // Also used by layout tags.
-	"article",
-	"aside",
-	"nav",
-	"section",
-};
diff --git a/src/rule/tag/specific.rs b/src/rule/tag/specific.rs
deleted file mode 100644
index 971c7a6..0000000
--- a/src/rule/tag/specific.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-use ::phf::{phf_set, Set};
-
-// Does not include SVG tags.
-static SPECIFIC_HTML_TAGS: Set<&'static str> = phf_set! {
-	"area",
-	"base",
-	"br",
-	"code", // Reason: unlikely to want to minify.
-	"col",
-	"embed",
-	"hr",
-	"img",
-	"input",
-	"param",
-	"pre", // Reason: unlikely to want to minify.
-	"script",
-	"source",
-	"track",
-}
diff --git a/src/rule/tag/svg.rs b/src/rule/tag/svg.rs
deleted file mode 100644
index 1aed064..0000000
--- a/src/rule/tag/svg.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-use ::phf::{phf_set, Set};
-
-// Sourced from https://developer.mozilla.org/en-US/docs/Web/SVG/Element at 2018-08-04T03:50:00Z.
-static SVG_TAGS: Set<&'static str> = phf_set! {
-	"a",
-	"altGlyph",
-	"altGlyphDef",
-	"altGlyphItem",
-	"animate",
-	"animateColor",
-	"animateMotion",
-	"animateTransform",
-	"circle",
-	"clipPath",
-	"color-profile",
-	"cursor",
-	"defs",
-	"desc",
-	"discard",
-	"ellipse",
-	"feBlend",
-	"feColorMatrix",
-	"feComponentTransfer",
-	"feComposite",
-	"feConvolveMatrix",
-	"feDiffuseLighting",
-	"feDisplacementMap",
-	"feDistantLight",
-	"feDropShadow",
-	"feFlood",
-	"feFuncA",
-	"feFuncB",
-	"feFuncG",
-	"feFuncR",
-	"feGaussianBlur",
-	"feImage",
-	"feMerge",
-	"feMergeNode",
-	"feMorphology",
-	"feOffset",
-	"fePointLight",
-	"feSpecularLighting",
-	"feSpotLight",
-	"feTile",
-	"feTurbulence",
-	"filter",
-	"font-face-format",
-	"font-face-name",
-	"font-face-src",
-	"font-face-uri",
-	"font-face",
-	"font",
-	"foreignObject",
-	"g",
-	"glyph",
-	"glyphRef",
-	"hatch",
-	"hatchpath",
-	"hkern",
-	"image",
-	"line",
-	"linearGradient",
-	"marker",
-	"mask",
-	"mesh",
-	"meshgradient",
-	"meshpatch",
-	"meshrow",
-	"metadata",
-	"missing-glyph",
-	"mpath",
-	"path",
-	"pattern",
-	"polygon",
-	"polyline",
-	"radialGradient",
-	"rect",
-	"script",
-	"set",
-	"solidcolor",
-	"stop",
-	"style",
-	"svg",
-	"switch",
-	"symbol",
-	"text",
-	"textPath",
-	"title",
-	"tref",
-	"tspan",
-	"unknown",
-	"use",
-	"view",
-	"vkern",
-};
diff --git a/src/rule/tag/valid.rs b/src/rule/tag/valid.rs
deleted file mode 100644
index bfb950b..0000000
--- a/src/rule/tag/valid.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn is_valid_tag(tag: &str) -> bool {
-	hb_rule_tag_html_check(tag) || hb_rule_tag_svg_check(tag)
-}
diff --git a/src/rule/tag/void.rs b/src/rule/tag/void.rs
deleted file mode 100644
index 9ecaea2..0000000
--- a/src/rule/tag/void.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-use ::phf::{phf_set, Set};
-
-static VOID_TAGS: Set<&'static str> = phf_set! {
-	"area",
-	"base",
-	"br",
-	"col",
-	"embed",
-	"hr",
-	"img",
-	"input",
-	"keygen",
-	"link",
-	"meta",
-	"param",
-	"source",
-	"track",
-	"wbr",
-};
diff --git a/src/rune.h b/src/rune.h
deleted file mode 100644
index 373708d..0000000
--- a/src/rune.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-// EOF represents the end of an input buffer, and is used for some functions
-// that return characters. It must be a value that would never appear in any
-// valid UTF-8 byte sequence.
-#define HB_EOF -1
-
-// This version of hyperbuild is designed for ASCII and works with UTF-8 (with
-// minor exceptions), so each character is one byte. Use char to maximise
-// compatibility with external and standard libraries.
-typedef char hb_rune;
-// When either a character or EOF needs to be returned, a character will be
-// represented by a valid hb_rune value and EOF will be represented by HB_EOF.
-// In this case, since HB_EOF fits within the valid values of hb_rune, no
-// separate type is needed. A separate type is still used to symbolically
-// represent possible HB_EOF return values.
-typedef char hb_eof_rune;
-
-#define hb_string_literal_length(str) (sizeof(str) - 1)
diff --git a/src/spec/codepoint.rs b/src/spec/codepoint.rs
new file mode 100644
index 0000000..3bac92d
--- /dev/null
+++ b/src/spec/codepoint.rs
@@ -0,0 +1,57 @@
+// Official spec defined code points.
+// See https://infra.spec.whatwg.org/#code-points for spec.
+
+pub fn is_tab_or_newline(c: u8) -> bool {
+    match c {
+        0x09 | 0x0a | 0x0d => true,
+        _ => false,
+    }
+}
+
+pub fn is_whitespace(c: u8) -> bool {
+    // Also update crate::proc::attr::quoted::STATIC when changing here.
+    match c {
+        0x09 | 0x0a | 0x0c | 0x0d | 0x20 => true,
+        _ => false,
+    }
+}
+
+pub fn is_c0_control(c: u8) -> bool {
+    c >= 0 && c <= 0x1f
+}
+
+pub fn is_control(c: u8) -> bool {
+    is_c0_control(c) || c >= 0x7f && c <= 0x9f
+}
+
+pub fn is_digit(c: u8) -> bool {
+    c >= b'0' && c <= b'9'
+}
+
+pub fn is_upper_hex_digit(c: u8) -> bool {
+    is_digit(c) || c >= b'A' && c <= b'F'
+}
+
+pub fn is_lower_hex_digit(c: u8) -> bool {
+    is_digit(c) || c >= b'a' && c <= b'f'
+}
+
+pub fn is_hex_digit(c: u8) -> bool {
+    is_upper_hex_digit(c) || is_lower_hex_digit(c)
+}
+
+pub fn is_upper_alpha(c: u8) -> bool {
+    c >= b'A' && c <= b'Z'
+}
+
+pub fn is_lower_alpha(c: u8) -> bool {
+    c >= b'a' && c <= b'z'
+}
+
+pub fn is_alpha(c: u8) -> bool {
+    is_upper_alpha(c) || is_lower_alpha(c)
+}
+
+pub fn is_alphanumeric(c: u8) -> bool {
+    is_digit(c) || is_alpha(c)
+}
diff --git a/src/spec/entity.rs b/src/spec/entity.rs
new file mode 100644
index 0000000..b77405f
--- /dev/null
+++ b/src/spec/entity.rs
@@ -0,0 +1,2046 @@
+use phf::{Map, phf_map};
+
+// Sourced from https://dev.w3.org/html5/html-author/charref at 2018-07-02T10:00:00Z.
+// TODO Update and use from https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references.
+// HTML entity reference names are case sensitive.
+pub static ENTITY_REFERENCES: Map<&'static [u8], u32> = phf_map! {
+	b"AElig" => 0xc6,
+	b"AMP" => 0x26,
+	b"Aacute" => 0xc1,
+	b"Abreve" => 0x102,
+	b"Acirc" => 0xc2,
+	b"Acy" => 0x410,
+	b"Afr" => 0x1d504,
+	b"Agrave" => 0xc0,
+	b"Alpha" => 0x391,
+	b"Amacr" => 0x100,
+	b"And" => 0x2a53,
+	b"Aogon" => 0x104,
+	b"Aopf" => 0x1d538,
+	b"ApplyFunction" => 0x2061,
+	b"Aring" => 0xc5,
+	b"Ascr" => 0x1d49c,
+	b"Assign" => 0x2254,
+	b"Atilde" => 0xc3,
+	b"Auml" => 0xc4,
+	b"Backslash" => 0x2216,
+	b"Barv" => 0x2ae7,
+	b"Barwed" => 0x2306,
+	b"Bcy" => 0x411,
+	b"Because" => 0x2235,
+	b"Bernoullis" => 0x212c,
+	b"Beta" => 0x392,
+	b"Bfr" => 0x1d505,
+	b"Bopf" => 0x1d539,
+	b"Breve" => 0x2d8,
+	b"Bscr" => 0x212c,
+	b"Bumpeq" => 0x224e,
+	b"CHcy" => 0x427,
+	b"COPY" => 0xa9,
+	b"Cacute" => 0x106,
+	b"Cap" => 0x22d2,
+	b"CapitalDifferentialD" => 0x2145,
+	b"Cayleys" => 0x212d,
+	b"Ccaron" => 0x10c,
+	b"Ccedil" => 0xc7,
+	b"Ccirc" => 0x108,
+	b"Cconint" => 0x2230,
+	b"Cdot" => 0x10a,
+	b"Cedilla" => 0xb8,
+	b"CenterDot" => 0xb7,
+	b"Cfr" => 0x212d,
+	b"Chi" => 0x3a7,
+	b"CircleDot" => 0x2299,
+	b"CircleMinus" => 0x2296,
+	b"CirclePlus" => 0x2295,
+	b"CircleTimes" => 0x2297,
+	b"ClockwiseContourIntegral" => 0x2232,
+	b"CloseCurlyDoubleQuote" => 0x201d,
+	b"CloseCurlyQuote" => 0x2019,
+	b"Colon" => 0x2237,
+	b"Colone" => 0x2a74,
+	b"Congruent" => 0x2261,
+	b"Conint" => 0x222f,
+	b"ContourIntegral" => 0x222e,
+	b"Copf" => 0x2102,
+	b"Coproduct" => 0x2210,
+	b"CounterClockwiseContourIntegral" => 0x2233,
+	b"Cross" => 0x2a2f,
+	b"Cscr" => 0x1d49e,
+	b"Cup" => 0x22d3,
+	b"CupCap" => 0x224d,
+	b"DD" => 0x2145,
+	b"DDotrahd" => 0x2911,
+	b"DJcy" => 0x402,
+	b"DScy" => 0x405,
+	b"DZcy" => 0x40f,
+	b"Dagger" => 0x2021,
+	b"Darr" => 0x21a1,
+	b"Dashv" => 0x2ae4,
+	b"Dcaron" => 0x10e,
+	b"Dcy" => 0x414,
+	b"Del" => 0x2207,
+	b"Delta" => 0x394,
+	b"Dfr" => 0x1d507,
+	b"DiacriticalAcute" => 0xb4,
+	b"DiacriticalDot" => 0x2d9,
+	b"DiacriticalDoubleAcute" => 0x2dd,
+	b"DiacriticalGrave" => 0x60,
+	b"DiacriticalTilde" => 0x2dc,
+	b"Diamond" => 0x22c4,
+	b"DifferentialD" => 0x2146,
+	b"Dopf" => 0x1d53b,
+	b"Dot" => 0xa8,
+	b"DotDot" => 0x20dc,
+	b"DotEqual" => 0x2250,
+	b"DoubleContourIntegral" => 0x222f,
+	b"DoubleDot" => 0xa8,
+	b"DoubleDownArrow" => 0x21d3,
+	b"DoubleLeftArrow" => 0x21d0,
+	b"DoubleLeftRightArrow" => 0x21d4,
+	b"DoubleLeftTee" => 0x2ae4,
+	b"DoubleLongLeftArrow" => 0x27f8,
+	b"DoubleLongLeftRightArrow" => 0x27fa,
+	b"DoubleLongRightArrow" => 0x27f9,
+	b"DoubleRightArrow" => 0x21d2,
+	b"DoubleRightTee" => 0x22a8,
+	b"DoubleUpArrow" => 0x21d1,
+	b"DoubleUpDownArrow" => 0x21d5,
+	b"DoubleVerticalBar" => 0x2225,
+	b"DownArrow" => 0x2193,
+	b"DownArrowBar" => 0x2913,
+	b"DownArrowUpArrow" => 0x21f5,
+	b"DownBreve" => 0x311,
+	b"DownLeftRightVector" => 0x2950,
+	b"DownLeftTeeVector" => 0x295e,
+	b"DownLeftVector" => 0x21bd,
+	b"DownLeftVectorBar" => 0x2956,
+	b"DownRightTeeVector" => 0x295f,
+	b"DownRightVector" => 0x21c1,
+	b"DownRightVectorBar" => 0x2957,
+	b"DownTee" => 0x22a4,
+	b"DownTeeArrow" => 0x21a7,
+	b"Downarrow" => 0x21d3,
+	b"Dscr" => 0x1d49f,
+	b"Dstrok" => 0x110,
+	b"ENG" => 0x14a,
+	b"ETH" => 0xd0,
+	b"Eacute" => 0xc9,
+	b"Ecaron" => 0x11a,
+	b"Ecirc" => 0xca,
+	b"Ecy" => 0x42d,
+	b"Edot" => 0x116,
+	b"Efr" => 0x1d508,
+	b"Egrave" => 0xc8,
+	b"Element" => 0x2208,
+	b"Emacr" => 0x112,
+	b"EmptySmallSquare" => 0x25fb,
+	b"EmptyVerySmallSquare" => 0x25ab,
+	b"Eogon" => 0x118,
+	b"Eopf" => 0x1d53c,
+	b"Epsilon" => 0x395,
+	b"Equal" => 0x2a75,
+	b"EqualTilde" => 0x2242,
+	b"Equilibrium" => 0x21cc,
+	b"Escr" => 0x2130,
+	b"Esim" => 0x2a73,
+	b"Eta" => 0x397,
+	b"Euml" => 0xcb,
+	b"Exists" => 0x2203,
+	b"ExponentialE" => 0x2147,
+	b"Fcy" => 0x424,
+	b"Ffr" => 0x1d509,
+	b"FilledSmallSquare" => 0x25fc,
+	b"FilledVerySmallSquare" => 0x25aa,
+	b"Fopf" => 0x1d53d,
+	b"ForAll" => 0x2200,
+	b"Fouriertrf" => 0x2131,
+	b"Fscr" => 0x2131,
+	b"GJcy" => 0x403,
+	b"GT" => 0x3e,
+	b"Gamma" => 0x393,
+	b"Gammad" => 0x3dc,
+	b"Gbreve" => 0x11e,
+	b"Gcedil" => 0x122,
+	b"Gcirc" => 0x11c,
+	b"Gcy" => 0x413,
+	b"Gdot" => 0x120,
+	b"Gfr" => 0x1d50a,
+	b"Gg" => 0x22d9,
+	b"Gopf" => 0x1d53e,
+	b"GreaterEqual" => 0x2265,
+	b"GreaterEqualLess" => 0x22db,
+	b"GreaterFullEqual" => 0x2267,
+	b"GreaterGreater" => 0x2aa2,
+	b"GreaterLess" => 0x2277,
+	b"GreaterSlantEqual" => 0x2a7e,
+	b"GreaterTilde" => 0x2273,
+	b"Gscr" => 0x1d4a2,
+	b"Gt" => 0x226b,
+	b"HARDcy" => 0x42a,
+	b"Hacek" => 0x2c7,
+	b"Hat" => 0x5e,
+	b"Hcirc" => 0x124,
+	b"Hfr" => 0x210c,
+	b"HilbertSpace" => 0x210b,
+	b"Hopf" => 0x210d,
+	b"HorizontalLine" => 0x2500,
+	b"Hscr" => 0x210b,
+	b"Hstrok" => 0x126,
+	b"HumpDownHump" => 0x224e,
+	b"HumpEqual" => 0x224f,
+	b"IEcy" => 0x415,
+	b"IJlig" => 0x132,
+	b"IOcy" => 0x401,
+	b"Iacute" => 0xcd,
+	b"Icirc" => 0xce,
+	b"Icy" => 0x418,
+	b"Idot" => 0x130,
+	b"Ifr" => 0x2111,
+	b"Igrave" => 0xcc,
+	b"Im" => 0x2111,
+	b"Imacr" => 0x12a,
+	b"ImaginaryI" => 0x2148,
+	b"Implies" => 0x21d2,
+	b"Int" => 0x222c,
+	b"Integral" => 0x222b,
+	b"Intersection" => 0x22c2,
+	b"InvisibleComma" => 0x2063,
+	b"InvisibleTimes" => 0x2062,
+	b"Iogon" => 0x12e,
+	b"Iopf" => 0x1d540,
+	b"Iota" => 0x399,
+	b"Iscr" => 0x2110,
+	b"Itilde" => 0x128,
+	b"Iukcy" => 0x406,
+	b"Iuml" => 0xcf,
+	b"Jcirc" => 0x134,
+	b"Jcy" => 0x419,
+	b"Jfr" => 0x1d50d,
+	b"Jopf" => 0x1d541,
+	b"Jscr" => 0x1d4a5,
+	b"Jsercy" => 0x408,
+	b"Jukcy" => 0x404,
+	b"KHcy" => 0x425,
+	b"KJcy" => 0x40c,
+	b"Kappa" => 0x39a,
+	b"Kcedil" => 0x136,
+	b"Kcy" => 0x41a,
+	b"Kfr" => 0x1d50e,
+	b"Kopf" => 0x1d542,
+	b"Kscr" => 0x1d4a6,
+	b"LJcy" => 0x409,
+	b"LT" => 0x3c,
+	b"Lacute" => 0x139,
+	b"Lambda" => 0x39b,
+	b"Lang" => 0x27ea,
+	b"Laplacetrf" => 0x2112,
+	b"Larr" => 0x219e,
+	b"Lcaron" => 0x13d,
+	b"Lcedil" => 0x13b,
+	b"Lcy" => 0x41b,
+	b"LeftAngleBracket" => 0x27e8,
+	b"LeftArrow" => 0x2190,
+	b"LeftArrowBar" => 0x21e4,
+	b"LeftArrowRightArrow" => 0x21c6,
+	b"LeftCeiling" => 0x2308,
+	b"LeftDoubleBracket" => 0x27e6,
+	b"LeftDownTeeVector" => 0x2961,
+	b"LeftDownVector" => 0x21c3,
+	b"LeftDownVectorBar" => 0x2959,
+	b"LeftFloor" => 0x230a,
+	b"LeftRightArrow" => 0x2194,
+	b"LeftRightVector" => 0x294e,
+	b"LeftTee" => 0x22a3,
+	b"LeftTeeArrow" => 0x21a4,
+	b"LeftTeeVector" => 0x295a,
+	b"LeftTriangle" => 0x22b2,
+	b"LeftTriangleBar" => 0x29cf,
+	b"LeftTriangleEqual" => 0x22b4,
+	b"LeftUpDownVector" => 0x2951,
+	b"LeftUpTeeVector" => 0x2960,
+	b"LeftUpVector" => 0x21bf,
+	b"LeftUpVectorBar" => 0x2958,
+	b"LeftVector" => 0x21bc,
+	b"LeftVectorBar" => 0x2952,
+	b"Leftarrow" => 0x21d0,
+	b"Leftrightarrow" => 0x21d4,
+	b"LessEqualGreater" => 0x22da,
+	b"LessFullEqual" => 0x2266,
+	b"LessGreater" => 0x2276,
+	b"LessLess" => 0x2aa1,
+	b"LessSlantEqual" => 0x2a7d,
+	b"LessTilde" => 0x2272,
+	b"Lfr" => 0x1d50f,
+	b"Ll" => 0x22d8,
+	b"Lleftarrow" => 0x21da,
+	b"Lmidot" => 0x13f,
+	b"LongLeftArrow" => 0x27f5,
+	b"LongLeftRightArrow" => 0x27f7,
+	b"LongRightArrow" => 0x27f6,
+	b"Longleftarrow" => 0x27f8,
+	b"Longleftrightarrow" => 0x27fa,
+	b"Longrightarrow" => 0x27f9,
+	b"Lopf" => 0x1d543,
+	b"LowerLeftArrow" => 0x2199,
+	b"LowerRightArrow" => 0x2198,
+	b"Lscr" => 0x2112,
+	b"Lsh" => 0x21b0,
+	b"Lstrok" => 0x141,
+	b"Lt" => 0x226a,
+	b"Map" => 0x2905,
+	b"Mcy" => 0x41c,
+	b"MediumSpace" => 0x205f,
+	b"Mellintrf" => 0x2133,
+	b"Mfr" => 0x1d510,
+	b"MinusPlus" => 0x2213,
+	b"Mopf" => 0x1d544,
+	b"Mscr" => 0x2133,
+	b"Mu" => 0x39c,
+	b"NJcy" => 0x40a,
+	b"Nacute" => 0x143,
+	b"Ncaron" => 0x147,
+	b"Ncedil" => 0x145,
+	b"Ncy" => 0x41d,
+	b"NegativeMediumSpace" => 0x200b,
+	b"NegativeThickSpace" => 0x200b,
+	b"NegativeThinSpace" => 0x200b,
+	b"NegativeVeryThinSpace" => 0x200b,
+	b"NestedGreaterGreater" => 0x226b,
+	b"NestedLessLess" => 0x226a,
+	b"NewLine" => 0xa,
+	b"Nfr" => 0x1d511,
+	b"NoBreak" => 0x2060,
+	b"NonBreakingSpace" => 0xa0,
+	b"Nopf" => 0x2115,
+	b"Not" => 0x2aec,
+	b"NotCongruent" => 0x2262,
+	b"NotCupCap" => 0x226d,
+	b"NotDoubleVerticalBar" => 0x2226,
+	b"NotElement" => 0x2209,
+	b"NotEqual" => 0x2260,
+	b"NotExists" => 0x2204,
+	b"NotGreater" => 0x226f,
+	b"NotGreaterEqual" => 0x2271,
+	b"NotGreaterLess" => 0x2279,
+	b"NotGreaterTilde" => 0x2275,
+	b"NotLeftTriangle" => 0x22ea,
+	b"NotLeftTriangleEqual" => 0x22ec,
+	b"NotLess" => 0x226e,
+	b"NotLessEqual" => 0x2270,
+	b"NotLessGreater" => 0x2278,
+	b"NotLessTilde" => 0x2274,
+	b"NotPrecedes" => 0x2280,
+	b"NotPrecedesSlantEqual" => 0x22e0,
+	b"NotReverseElement" => 0x220c,
+	b"NotRightTriangle" => 0x22eb,
+	b"NotRightTriangleEqual" => 0x22ed,
+	b"NotSquareSubsetEqual" => 0x22e2,
+	b"NotSquareSupersetEqual" => 0x22e3,
+	b"NotSubsetEqual" => 0x2288,
+	b"NotSucceeds" => 0x2281,
+	b"NotSucceedsSlantEqual" => 0x22e1,
+	b"NotSupersetEqual" => 0x2289,
+	b"NotTilde" => 0x2241,
+	b"NotTildeEqual" => 0x2244,
+	b"NotTildeFullEqual" => 0x2247,
+	b"NotTildeTilde" => 0x2249,
+	b"NotVerticalBar" => 0x2224,
+	b"Nscr" => 0x1d4a9,
+	b"Ntilde" => 0xd1,
+	b"Nu" => 0x39d,
+	b"OElig" => 0x152,
+	b"Oacute" => 0xd3,
+	b"Ocirc" => 0xd4,
+	b"Ocy" => 0x41e,
+	b"Odblac" => 0x150,
+	b"Ofr" => 0x1d512,
+	b"Ograve" => 0xd2,
+	b"Omacr" => 0x14c,
+	b"Omega" => 0x3a9,
+	b"Omicron" => 0x39f,
+	b"Oopf" => 0x1d546,
+	b"OpenCurlyDoubleQuote" => 0x201c,
+	b"OpenCurlyQuote" => 0x2018,
+	b"Or" => 0x2a54,
+	b"Oscr" => 0x1d4aa,
+	b"Oslash" => 0xd8,
+	b"Otilde" => 0xd5,
+	b"Otimes" => 0x2a37,
+	b"Ouml" => 0xd6,
+	b"OverBar" => 0xaf,
+	b"OverBrace" => 0x23de,
+	b"OverBracket" => 0x23b4,
+	b"OverParenthesis" => 0x23dc,
+	b"PartialD" => 0x2202,
+	b"Pcy" => 0x41f,
+	b"Pfr" => 0x1d513,
+	b"Phi" => 0x3a6,
+	b"Pi" => 0x3a0,
+	b"PlusMinus" => 0xb1,
+	b"Poincareplane" => 0x210c,
+	b"Popf" => 0x2119,
+	b"Pr" => 0x2abb,
+	b"Precedes" => 0x227a,
+	b"PrecedesEqual" => 0x2aaf,
+	b"PrecedesSlantEqual" => 0x227c,
+	b"PrecedesTilde" => 0x227e,
+	b"Prime" => 0x2033,
+	b"Product" => 0x220f,
+	b"Proportion" => 0x2237,
+	b"Proportional" => 0x221d,
+	b"Pscr" => 0x1d4ab,
+	b"Psi" => 0x3a8,
+	b"QUOT" => 0x22,
+	b"Qfr" => 0x1d514,
+	b"Qopf" => 0x211a,
+	b"Qscr" => 0x1d4ac,
+	b"RBarr" => 0x2910,
+	b"REG" => 0xae,
+	b"Racute" => 0x154,
+	b"Rang" => 0x27eb,
+	b"Rarr" => 0x21a0,
+	b"Rarrtl" => 0x2916,
+	b"Rcaron" => 0x158,
+	b"Rcedil" => 0x156,
+	b"Rcy" => 0x420,
+	b"Re" => 0x211c,
+	b"ReverseElement" => 0x220b,
+	b"ReverseEquilibrium" => 0x21cb,
+	b"ReverseUpEquilibrium" => 0x296f,
+	b"Rfr" => 0x211c,
+	b"Rho" => 0x3a1,
+	b"RightAngleBracket" => 0x27e9,
+	b"RightArrow" => 0x2192,
+	b"RightArrowBar" => 0x21e5,
+	b"RightArrowLeftArrow" => 0x21c4,
+	b"RightCeiling" => 0x2309,
+	b"RightDoubleBracket" => 0x27e7,
+	b"RightDownTeeVector" => 0x295d,
+	b"RightDownVector" => 0x21c2,
+	b"RightDownVectorBar" => 0x2955,
+	b"RightFloor" => 0x230b,
+	b"RightTee" => 0x22a2,
+	b"RightTeeArrow" => 0x21a6,
+	b"RightTeeVector" => 0x295b,
+	b"RightTriangle" => 0x22b3,
+	b"RightTriangleBar" => 0x29d0,
+	b"RightTriangleEqual" => 0x22b5,
+	b"RightUpDownVector" => 0x294f,
+	b"RightUpTeeVector" => 0x295c,
+	b"RightUpVector" => 0x21be,
+	b"RightUpVectorBar" => 0x2954,
+	b"RightVector" => 0x21c0,
+	b"RightVectorBar" => 0x2953,
+	b"Rightarrow" => 0x21d2,
+	b"Ropf" => 0x211d,
+	b"RoundImplies" => 0x2970,
+	b"Rrightarrow" => 0x21db,
+	b"Rscr" => 0x211b,
+	b"Rsh" => 0x21b1,
+	b"RuleDelayed" => 0x29f4,
+	b"SHCHcy" => 0x429,
+	b"SHcy" => 0x428,
+	b"SOFTcy" => 0x42c,
+	b"Sacute" => 0x15a,
+	b"Sc" => 0x2abc,
+	b"Scaron" => 0x160,
+	b"Scedil" => 0x15e,
+	b"Scirc" => 0x15c,
+	b"Scy" => 0x421,
+	b"Sfr" => 0x1d516,
+	b"ShortDownArrow" => 0x2193,
+	b"ShortLeftArrow" => 0x2190,
+	b"ShortRightArrow" => 0x2192,
+	b"ShortUpArrow" => 0x2191,
+	b"Sigma" => 0x3a3,
+	b"SmallCircle" => 0x2218,
+	b"Sopf" => 0x1d54a,
+	b"Sqrt" => 0x221a,
+	b"Square" => 0x25a1,
+	b"SquareIntersection" => 0x2293,
+	b"SquareSubset" => 0x228f,
+	b"SquareSubsetEqual" => 0x2291,
+	b"SquareSuperset" => 0x2290,
+	b"SquareSupersetEqual" => 0x2292,
+	b"SquareUnion" => 0x2294,
+	b"Sscr" => 0x1d4ae,
+	b"Star" => 0x22c6,
+	b"Sub" => 0x22d0,
+	b"Subset" => 0x22d0,
+	b"SubsetEqual" => 0x2286,
+	b"Succeeds" => 0x227b,
+	b"SucceedsEqual" => 0x2ab0,
+	b"SucceedsSlantEqual" => 0x227d,
+	b"SucceedsTilde" => 0x227f,
+	b"SuchThat" => 0x220b,
+	b"Sum" => 0x2211,
+	b"Sup" => 0x22d1,
+	b"Superset" => 0x2283,
+	b"SupersetEqual" => 0x2287,
+	b"Supset" => 0x22d1,
+	b"THORN" => 0xde,
+	b"TRADE" => 0x2122,
+	b"TSHcy" => 0x40b,
+	b"TScy" => 0x426,
+	b"Tab" => 0x9,
+	b"Tau" => 0x3a4,
+	b"Tcaron" => 0x164,
+	b"Tcedil" => 0x162,
+	b"Tcy" => 0x422,
+	b"Tfr" => 0x1d517,
+	b"Therefore" => 0x2234,
+	b"Theta" => 0x398,
+	b"ThinSpace" => 0x2009,
+	b"Tilde" => 0x223c,
+	b"TildeEqual" => 0x2243,
+	b"TildeFullEqual" => 0x2245,
+	b"TildeTilde" => 0x2248,
+	b"Topf" => 0x1d54b,
+	b"TripleDot" => 0x20db,
+	b"Tscr" => 0x1d4af,
+	b"Tstrok" => 0x166,
+	b"Uacute" => 0xda,
+	b"Uarr" => 0x219f,
+	b"Uarrocir" => 0x2949,
+	b"Ubrcy" => 0x40e,
+	b"Ubreve" => 0x16c,
+	b"Ucirc" => 0xdb,
+	b"Ucy" => 0x423,
+	b"Udblac" => 0x170,
+	b"Ufr" => 0x1d518,
+	b"Ugrave" => 0xd9,
+	b"Umacr" => 0x16a,
+	b"UnderBar" => 0x332,
+	b"UnderBrace" => 0x23df,
+	b"UnderBracket" => 0x23b5,
+	b"UnderParenthesis" => 0x23dd,
+	b"Union" => 0x22c3,
+	b"UnionPlus" => 0x228e,
+	b"Uogon" => 0x172,
+	b"Uopf" => 0x1d54c,
+	b"UpArrow" => 0x2191,
+	b"UpArrowBar" => 0x2912,
+	b"UpArrowDownArrow" => 0x21c5,
+	b"UpDownArrow" => 0x2195,
+	b"UpEquilibrium" => 0x296e,
+	b"UpTee" => 0x22a5,
+	b"UpTeeArrow" => 0x21a5,
+	b"Uparrow" => 0x21d1,
+	b"Updownarrow" => 0x21d5,
+	b"UpperLeftArrow" => 0x2196,
+	b"UpperRightArrow" => 0x2197,
+	b"Upsi" => 0x3d2,
+	b"Upsilon" => 0x3a5,
+	b"Uring" => 0x16e,
+	b"Uscr" => 0x1d4b0,
+	b"Utilde" => 0x168,
+	b"Uuml" => 0xdc,
+	b"VDash" => 0x22ab,
+	b"Vbar" => 0x2aeb,
+	b"Vcy" => 0x412,
+	b"Vdash" => 0x22a9,
+	b"Vdashl" => 0x2ae6,
+	b"Vee" => 0x22c1,
+	b"Verbar" => 0x2016,
+	b"Vert" => 0x2016,
+	b"VerticalBar" => 0x2223,
+	b"VerticalLine" => 0x7c,
+	b"VerticalSeparator" => 0x2758,
+	b"VerticalTilde" => 0x2240,
+	b"VeryThinSpace" => 0x200a,
+	b"Vfr" => 0x1d519,
+	b"Vopf" => 0x1d54d,
+	b"Vscr" => 0x1d4b1,
+	b"Vvdash" => 0x22aa,
+	b"Wcirc" => 0x174,
+	b"Wedge" => 0x22c0,
+	b"Wfr" => 0x1d51a,
+	b"Wopf" => 0x1d54e,
+	b"Wscr" => 0x1d4b2,
+	b"Xfr" => 0x1d51b,
+	b"Xi" => 0x39e,
+	b"Xopf" => 0x1d54f,
+	b"Xscr" => 0x1d4b3,
+	b"YAcy" => 0x42f,
+	b"YIcy" => 0x407,
+	b"YUcy" => 0x42e,
+	b"Yacute" => 0xdd,
+	b"Ycirc" => 0x176,
+	b"Ycy" => 0x42b,
+	b"Yfr" => 0x1d51c,
+	b"Yopf" => 0x1d550,
+	b"Yscr" => 0x1d4b4,
+	b"Yuml" => 0x178,
+	b"ZHcy" => 0x416,
+	b"Zacute" => 0x179,
+	b"Zcaron" => 0x17d,
+	b"Zcy" => 0x417,
+	b"Zdot" => 0x17b,
+	b"ZeroWidthSpace" => 0x200b,
+	b"Zeta" => 0x396,
+	b"Zfr" => 0x2128,
+	b"Zopf" => 0x2124,
+	b"Zscr" => 0x1d4b5,
+	b"aacute" => 0xe1,
+	b"abreve" => 0x103,
+	b"ac" => 0x223e,
+	b"acd" => 0x223f,
+	b"acirc" => 0xe2,
+	b"acute" => 0xb4,
+	b"acy" => 0x430,
+	b"aelig" => 0xe6,
+	b"af" => 0x2061,
+	b"afr" => 0x1d51e,
+	b"agrave" => 0xe0,
+	b"alefsym" => 0x2135,
+	b"aleph" => 0x2135,
+	b"alpha" => 0x3b1,
+	b"amacr" => 0x101,
+	b"amalg" => 0x2a3f,
+	b"amp" => 0x26,
+	b"and" => 0x2227,
+	b"andand" => 0x2a55,
+	b"andd" => 0x2a5c,
+	b"andslope" => 0x2a58,
+	b"andv" => 0x2a5a,
+	b"ang" => 0x2220,
+	b"ange" => 0x29a4,
+	b"angle" => 0x2220,
+	b"angmsd" => 0x2221,
+	b"angmsdaa" => 0x29a8,
+	b"angmsdab" => 0x29a9,
+	b"angmsdac" => 0x29aa,
+	b"angmsdad" => 0x29ab,
+	b"angmsdae" => 0x29ac,
+	b"angmsdaf" => 0x29ad,
+	b"angmsdag" => 0x29ae,
+	b"angmsdah" => 0x29af,
+	b"angrt" => 0x221f,
+	b"angrtvb" => 0x22be,
+	b"angrtvbd" => 0x299d,
+	b"angsph" => 0x2222,
+	b"angst" => 0x212b,
+	b"angzarr" => 0x237c,
+	b"aogon" => 0x105,
+	b"aopf" => 0x1d552,
+	b"ap" => 0x2248,
+	b"apE" => 0x2a70,
+	b"apacir" => 0x2a6f,
+	b"ape" => 0x224a,
+	b"apid" => 0x224b,
+	b"apos" => 0x27,
+	b"approx" => 0x2248,
+	b"approxeq" => 0x224a,
+	b"aring" => 0xe5,
+	b"ascr" => 0x1d4b6,
+	b"ast" => 0x2a,
+	b"asymp" => 0x2248,
+	b"asympeq" => 0x224d,
+	b"atilde" => 0xe3,
+	b"auml" => 0xe4,
+	b"awconint" => 0x2233,
+	b"awint" => 0x2a11,
+	b"bNot" => 0x2aed,
+	b"backcong" => 0x224c,
+	b"backepsilon" => 0x3f6,
+	b"backprime" => 0x2035,
+	b"backsim" => 0x223d,
+	b"backsimeq" => 0x22cd,
+	b"barvee" => 0x22bd,
+	b"barwed" => 0x2305,
+	b"barwedge" => 0x2305,
+	b"bbrk" => 0x23b5,
+	b"bbrktbrk" => 0x23b6,
+	b"bcong" => 0x224c,
+	b"bcy" => 0x431,
+	b"bdquo" => 0x201e,
+	b"becaus" => 0x2235,
+	b"because" => 0x2235,
+	b"bemptyv" => 0x29b0,
+	b"bepsi" => 0x3f6,
+	b"bernou" => 0x212c,
+	b"beta" => 0x3b2,
+	b"beth" => 0x2136,
+	b"between" => 0x226c,
+	b"bfr" => 0x1d51f,
+	b"bigcap" => 0x22c2,
+	b"bigcirc" => 0x25ef,
+	b"bigcup" => 0x22c3,
+	b"bigodot" => 0x2a00,
+	b"bigoplus" => 0x2a01,
+	b"bigotimes" => 0x2a02,
+	b"bigsqcup" => 0x2a06,
+	b"bigstar" => 0x2605,
+	b"bigtriangledown" => 0x25bd,
+	b"bigtriangleup" => 0x25b3,
+	b"biguplus" => 0x2a04,
+	b"bigvee" => 0x22c1,
+	b"bigwedge" => 0x22c0,
+	b"bkarow" => 0x290d,
+	b"blacklozenge" => 0x29eb,
+	b"blacksquare" => 0x25aa,
+	b"blacktriangle" => 0x25b4,
+	b"blacktriangledown" => 0x25be,
+	b"blacktriangleleft" => 0x25c2,
+	b"blacktriangleright" => 0x25b8,
+	b"blank" => 0x2423,
+	b"blk12" => 0x2592,
+	b"blk14" => 0x2591,
+	b"blk34" => 0x2593,
+	b"block" => 0x2588,
+	b"bnot" => 0x2310,
+	b"bopf" => 0x1d553,
+	b"bot" => 0x22a5,
+	b"bottom" => 0x22a5,
+	b"bowtie" => 0x22c8,
+	b"boxDL" => 0x2557,
+	b"boxDR" => 0x2554,
+	b"boxDl" => 0x2556,
+	b"boxDr" => 0x2553,
+	b"boxH" => 0x2550,
+	b"boxHD" => 0x2566,
+	b"boxHU" => 0x2569,
+	b"boxHd" => 0x2564,
+	b"boxHu" => 0x2567,
+	b"boxUL" => 0x255d,
+	b"boxUR" => 0x255a,
+	b"boxUl" => 0x255c,
+	b"boxUr" => 0x2559,
+	b"boxV" => 0x2551,
+	b"boxVH" => 0x256c,
+	b"boxVL" => 0x2563,
+	b"boxVR" => 0x2560,
+	b"boxVh" => 0x256b,
+	b"boxVl" => 0x2562,
+	b"boxVr" => 0x255f,
+	b"boxbox" => 0x29c9,
+	b"boxdL" => 0x2555,
+	b"boxdR" => 0x2552,
+	b"boxdl" => 0x2510,
+	b"boxdr" => 0x250c,
+	b"boxh" => 0x2500,
+	b"boxhD" => 0x2565,
+	b"boxhU" => 0x2568,
+	b"boxhd" => 0x252c,
+	b"boxhu" => 0x2534,
+	b"boxminus" => 0x229f,
+	b"boxplus" => 0x229e,
+	b"boxtimes" => 0x22a0,
+	b"boxuL" => 0x255b,
+	b"boxuR" => 0x2558,
+	b"boxul" => 0x2518,
+	b"boxur" => 0x2514,
+	b"boxv" => 0x2502,
+	b"boxvH" => 0x256a,
+	b"boxvL" => 0x2561,
+	b"boxvR" => 0x255e,
+	b"boxvh" => 0x253c,
+	b"boxvl" => 0x2524,
+	b"boxvr" => 0x251c,
+	b"bprime" => 0x2035,
+	b"breve" => 0x2d8,
+	b"brvbar" => 0xa6,
+	b"bscr" => 0x1d4b7,
+	b"bsemi" => 0x204f,
+	b"bsim" => 0x223d,
+	b"bsime" => 0x22cd,
+	b"bsol" => 0x5c,
+	b"bsolb" => 0x29c5,
+	b"bull" => 0x2022,
+	b"bullet" => 0x2022,
+	b"bump" => 0x224e,
+	b"bumpE" => 0x2aae,
+	b"bumpe" => 0x224f,
+	b"bumpeq" => 0x224f,
+	b"cacute" => 0x107,
+	b"cap" => 0x2229,
+	b"capand" => 0x2a44,
+	b"capbrcup" => 0x2a49,
+	b"capcap" => 0x2a4b,
+	b"capcup" => 0x2a47,
+	b"capdot" => 0x2a40,
+	b"caret" => 0x2041,
+	b"caron" => 0x2c7,
+	b"ccaps" => 0x2a4d,
+	b"ccaron" => 0x10d,
+	b"ccedil" => 0xe7,
+	b"ccirc" => 0x109,
+	b"ccups" => 0x2a4c,
+	b"ccupssm" => 0x2a50,
+	b"cdot" => 0x10b,
+	b"cedil" => 0xb8,
+	b"cemptyv" => 0x29b2,
+	b"cent" => 0xa2,
+	b"centerdot" => 0xb7,
+	b"cfr" => 0x1d520,
+	b"chcy" => 0x447,
+	b"check" => 0x2713,
+	b"checkmark" => 0x2713,
+	b"chi" => 0x3c7,
+	b"cir" => 0x25cb,
+	b"cirE" => 0x29c3,
+	b"circ" => 0x2c6,
+	b"circeq" => 0x2257,
+	b"circlearrowleft" => 0x21ba,
+	b"circlearrowright" => 0x21bb,
+	b"circledR" => 0xae,
+	b"circledS" => 0x24c8,
+	b"circledast" => 0x229b,
+	b"circledcirc" => 0x229a,
+	b"circleddash" => 0x229d,
+	b"cire" => 0x2257,
+	b"cirfnint" => 0x2a10,
+	b"cirmid" => 0x2aef,
+	b"cirscir" => 0x29c2,
+	b"clubs" => 0x2663,
+	b"clubsuit" => 0x2663,
+	b"colon" => 0x3a,
+	b"colone" => 0x2254,
+	b"coloneq" => 0x2254,
+	b"comma" => 0x2c,
+	b"commat" => 0x40,
+	b"comp" => 0x2201,
+	b"compfn" => 0x2218,
+	b"complement" => 0x2201,
+	b"complexes" => 0x2102,
+	b"cong" => 0x2245,
+	b"congdot" => 0x2a6d,
+	b"conint" => 0x222e,
+	b"copf" => 0x1d554,
+	b"coprod" => 0x2210,
+	b"copy" => 0xa9,
+	b"copysr" => 0x2117,
+	b"crarr" => 0x21b5,
+	b"cross" => 0x2717,
+	b"cscr" => 0x1d4b8,
+	b"csub" => 0x2acf,
+	b"csube" => 0x2ad1,
+	b"csup" => 0x2ad0,
+	b"csupe" => 0x2ad2,
+	b"ctdot" => 0x22ef,
+	b"cudarrl" => 0x2938,
+	b"cudarrr" => 0x2935,
+	b"cuepr" => 0x22de,
+	b"cuesc" => 0x22df,
+	b"cularr" => 0x21b6,
+	b"cularrp" => 0x293d,
+	b"cup" => 0x222a,
+	b"cupbrcap" => 0x2a48,
+	b"cupcap" => 0x2a46,
+	b"cupcup" => 0x2a4a,
+	b"cupdot" => 0x228d,
+	b"cupor" => 0x2a45,
+	b"curarr" => 0x21b7,
+	b"curarrm" => 0x293c,
+	b"curlyeqprec" => 0x22de,
+	b"curlyeqsucc" => 0x22df,
+	b"curlyvee" => 0x22ce,
+	b"curlywedge" => 0x22cf,
+	b"curren" => 0xa4,
+	b"curvearrowleft" => 0x21b6,
+	b"curvearrowright" => 0x21b7,
+	b"cuvee" => 0x22ce,
+	b"cuwed" => 0x22cf,
+	b"cwconint" => 0x2232,
+	b"cwint" => 0x2231,
+	b"cylcty" => 0x232d,
+	b"dArr" => 0x21d3,
+	b"dHar" => 0x2965,
+	b"dagger" => 0x2020,
+	b"daleth" => 0x2138,
+	b"darr" => 0x2193,
+	b"dash" => 0x2010,
+	b"dashv" => 0x22a3,
+	b"dbkarow" => 0x290f,
+	b"dblac" => 0x2dd,
+	b"dcaron" => 0x10f,
+	b"dcy" => 0x434,
+	b"dd" => 0x2146,
+	b"ddagger" => 0x2021,
+	b"ddarr" => 0x21ca,
+	b"ddotseq" => 0x2a77,
+	b"deg" => 0xb0,
+	b"delta" => 0x3b4,
+	b"demptyv" => 0x29b1,
+	b"dfisht" => 0x297f,
+	b"dfr" => 0x1d521,
+	b"dharl" => 0x21c3,
+	b"dharr" => 0x21c2,
+	b"diam" => 0x22c4,
+	b"diamond" => 0x22c4,
+	b"diamondsuit" => 0x2666,
+	b"diams" => 0x2666,
+	b"die" => 0xa8,
+	b"digamma" => 0x3dd,
+	b"disin" => 0x22f2,
+	b"div" => 0xf7,
+	b"divide" => 0xf7,
+	b"divideontimes" => 0x22c7,
+	b"divonx" => 0x22c7,
+	b"djcy" => 0x452,
+	b"dlcorn" => 0x231e,
+	b"dlcrop" => 0x230d,
+	b"dollar" => 0x24,
+	b"dopf" => 0x1d555,
+	b"dot" => 0x2d9,
+	b"doteq" => 0x2250,
+	b"doteqdot" => 0x2251,
+	b"dotminus" => 0x2238,
+	b"dotplus" => 0x2214,
+	b"dotsquare" => 0x22a1,
+	b"doublebarwedge" => 0x2306,
+	b"downarrow" => 0x2193,
+	b"downdownarrows" => 0x21ca,
+	b"downharpoonleft" => 0x21c3,
+	b"downharpoonright" => 0x21c2,
+	b"drbkarow" => 0x2910,
+	b"drcorn" => 0x231f,
+	b"drcrop" => 0x230c,
+	b"dscr" => 0x1d4b9,
+	b"dscy" => 0x455,
+	b"dsol" => 0x29f6,
+	b"dstrok" => 0x111,
+	b"dtdot" => 0x22f1,
+	b"dtri" => 0x25bf,
+	b"dtrif" => 0x25be,
+	b"duarr" => 0x21f5,
+	b"duhar" => 0x296f,
+	b"dwangle" => 0x29a6,
+	b"dzcy" => 0x45f,
+	b"dzigrarr" => 0x27ff,
+	b"eDDot" => 0x2a77,
+	b"eDot" => 0x2251,
+	b"eacute" => 0xe9,
+	b"easter" => 0x2a6e,
+	b"ecaron" => 0x11b,
+	b"ecir" => 0x2256,
+	b"ecirc" => 0xea,
+	b"ecolon" => 0x2255,
+	b"ecy" => 0x44d,
+	b"edot" => 0x117,
+	b"ee" => 0x2147,
+	b"efDot" => 0x2252,
+	b"efr" => 0x1d522,
+	b"eg" => 0x2a9a,
+	b"egrave" => 0xe8,
+	b"egs" => 0x2a96,
+	b"egsdot" => 0x2a98,
+	b"el" => 0x2a99,
+	b"elinters" => 0x23e7,
+	b"ell" => 0x2113,
+	b"els" => 0x2a95,
+	b"elsdot" => 0x2a97,
+	b"emacr" => 0x113,
+	b"empty" => 0x2205,
+	b"emptyset" => 0x2205,
+	b"emptyv" => 0x2205,
+	b"emsp" => 0x2003,
+	b"emsp13" => 0x2004,
+	b"emsp14" => 0x2005,
+	b"eng" => 0x14b,
+	b"ensp" => 0x2002,
+	b"eogon" => 0x119,
+	b"eopf" => 0x1d556,
+	b"epar" => 0x22d5,
+	b"eparsl" => 0x29e3,
+	b"eplus" => 0x2a71,
+	b"epsi" => 0x3f5,
+	b"epsilon" => 0x3b5,
+	b"epsiv" => 0x3b5,
+	b"eqcirc" => 0x2256,
+	b"eqcolon" => 0x2255,
+	b"eqsim" => 0x2242,
+	b"eqslantgtr" => 0x2a96,
+	b"eqslantless" => 0x2a95,
+	b"equals" => 0x3d,
+	b"equest" => 0x225f,
+	b"equiv" => 0x2261,
+	b"equivDD" => 0x2a78,
+	b"eqvparsl" => 0x29e5,
+	b"erDot" => 0x2253,
+	b"erarr" => 0x2971,
+	b"escr" => 0x212f,
+	b"esdot" => 0x2250,
+	b"esim" => 0x2242,
+	b"eta" => 0x3b7,
+	b"eth" => 0xf0,
+	b"euml" => 0xeb,
+	b"euro" => 0x20ac,
+	b"excl" => 0x21,
+	b"exist" => 0x2203,
+	b"expectation" => 0x2130,
+	b"exponentiale" => 0x2147,
+	b"fallingdotseq" => 0x2252,
+	b"fcy" => 0x444,
+	b"female" => 0x2640,
+	b"ffilig" => 0xfb03,
+	b"fflig" => 0xfb00,
+	b"ffllig" => 0xfb04,
+	b"ffr" => 0x1d523,
+	b"filig" => 0xfb01,
+	b"flat" => 0x266d,
+	b"fllig" => 0xfb02,
+	b"fltns" => 0x25b1,
+	b"fnof" => 0x192,
+	b"fopf" => 0x1d557,
+	b"forall" => 0x2200,
+	b"fork" => 0x22d4,
+	b"forkv" => 0x2ad9,
+	b"fpartint" => 0x2a0d,
+	b"frac12" => 0xbd,
+	b"frac13" => 0x2153,
+	b"frac14" => 0xbc,
+	b"frac15" => 0x2155,
+	b"frac16" => 0x2159,
+	b"frac18" => 0x215b,
+	b"frac23" => 0x2154,
+	b"frac25" => 0x2156,
+	b"frac34" => 0xbe,
+	b"frac35" => 0x2157,
+	b"frac38" => 0x215c,
+	b"frac45" => 0x2158,
+	b"frac56" => 0x215a,
+	b"frac58" => 0x215d,
+	b"frac78" => 0x215e,
+	b"frasl" => 0x2044,
+	b"frown" => 0x2322,
+	b"fscr" => 0x1d4bb,
+	b"gE" => 0x2267,
+	b"gEl" => 0x2a8c,
+	b"gacute" => 0x1f5,
+	b"gamma" => 0x3b3,
+	b"gammad" => 0x3dd,
+	b"gap" => 0x2a86,
+	b"gbreve" => 0x11f,
+	b"gcirc" => 0x11d,
+	b"gcy" => 0x433,
+	b"gdot" => 0x121,
+	b"ge" => 0x2265,
+	b"gel" => 0x22db,
+	b"geq" => 0x2265,
+	b"geqq" => 0x2267,
+	b"geqslant" => 0x2a7e,
+	b"ges" => 0x2a7e,
+	b"gescc" => 0x2aa9,
+	b"gesdot" => 0x2a80,
+	b"gesdoto" => 0x2a82,
+	b"gesdotol" => 0x2a84,
+	b"gesles" => 0x2a94,
+	b"gfr" => 0x1d524,
+	b"gg" => 0x226b,
+	b"ggg" => 0x22d9,
+	b"gimel" => 0x2137,
+	b"gjcy" => 0x453,
+	b"gl" => 0x2277,
+	b"glE" => 0x2a92,
+	b"gla" => 0x2aa5,
+	b"glj" => 0x2aa4,
+	b"gnE" => 0x2269,
+	b"gnap" => 0x2a8a,
+	b"gnapprox" => 0x2a8a,
+	b"gne" => 0x2a88,
+	b"gneq" => 0x2a88,
+	b"gneqq" => 0x2269,
+	b"gnsim" => 0x22e7,
+	b"gopf" => 0x1d558,
+	b"grave" => 0x60,
+	b"gscr" => 0x210a,
+	b"gsim" => 0x2273,
+	b"gsime" => 0x2a8e,
+	b"gsiml" => 0x2a90,
+	b"gt" => 0x3e,
+	b"gtcc" => 0x2aa7,
+	b"gtcir" => 0x2a7a,
+	b"gtdot" => 0x22d7,
+	b"gtlPar" => 0x2995,
+	b"gtquest" => 0x2a7c,
+	b"gtrapprox" => 0x2a86,
+	b"gtrarr" => 0x2978,
+	b"gtrdot" => 0x22d7,
+	b"gtreqless" => 0x22db,
+	b"gtreqqless" => 0x2a8c,
+	b"gtrless" => 0x2277,
+	b"gtrsim" => 0x2273,
+	b"hArr" => 0x21d4,
+	b"hairsp" => 0x200a,
+	b"half" => 0xbd,
+	b"hamilt" => 0x210b,
+	b"hardcy" => 0x44a,
+	b"harr" => 0x2194,
+	b"harrcir" => 0x2948,
+	b"harrw" => 0x21ad,
+	b"hbar" => 0x210f,
+	b"hcirc" => 0x125,
+	b"hearts" => 0x2665,
+	b"heartsuit" => 0x2665,
+	b"hellip" => 0x2026,
+	b"hercon" => 0x22b9,
+	b"hfr" => 0x1d525,
+	b"hksearow" => 0x2925,
+	b"hkswarow" => 0x2926,
+	b"hoarr" => 0x21ff,
+	b"homtht" => 0x223b,
+	b"hookleftarrow" => 0x21a9,
+	b"hookrightarrow" => 0x21aa,
+	b"hopf" => 0x1d559,
+	b"horbar" => 0x2015,
+	b"hscr" => 0x1d4bd,
+	b"hslash" => 0x210f,
+	b"hstrok" => 0x127,
+	b"hybull" => 0x2043,
+	b"hyphen" => 0x2010,
+	b"iacute" => 0xed,
+	b"ic" => 0x2063,
+	b"icirc" => 0xee,
+	b"icy" => 0x438,
+	b"iecy" => 0x435,
+	b"iexcl" => 0xa1,
+	b"iff" => 0x21d4,
+	b"ifr" => 0x1d526,
+	b"igrave" => 0xec,
+	b"ii" => 0x2148,
+	b"iiiint" => 0x2a0c,
+	b"iiint" => 0x222d,
+	b"iinfin" => 0x29dc,
+	b"iiota" => 0x2129,
+	b"ijlig" => 0x133,
+	b"imacr" => 0x12b,
+	b"image" => 0x2111,
+	b"imagline" => 0x2110,
+	b"imagpart" => 0x2111,
+	b"imath" => 0x131,
+	b"imof" => 0x22b7,
+	b"imped" => 0x1b5,
+	b"in" => 0x2208,
+	b"incare" => 0x2105,
+	b"infin" => 0x221e,
+	b"infintie" => 0x29dd,
+	b"inodot" => 0x131,
+	b"int" => 0x222b,
+	b"intcal" => 0x22ba,
+	b"integers" => 0x2124,
+	b"intercal" => 0x22ba,
+	b"intlarhk" => 0x2a17,
+	b"intprod" => 0x2a3c,
+	b"iocy" => 0x451,
+	b"iogon" => 0x12f,
+	b"iopf" => 0x1d55a,
+	b"iota" => 0x3b9,
+	b"iprod" => 0x2a3c,
+	b"iquest" => 0xbf,
+	b"iscr" => 0x1d4be,
+	b"isin" => 0x2208,
+	b"isinE" => 0x22f9,
+	b"isindot" => 0x22f5,
+	b"isins" => 0x22f4,
+	b"isinsv" => 0x22f3,
+	b"isinv" => 0x2208,
+	b"it" => 0x2062,
+	b"itilde" => 0x129,
+	b"iukcy" => 0x456,
+	b"iuml" => 0xef,
+	b"jcirc" => 0x135,
+	b"jcy" => 0x439,
+	b"jfr" => 0x1d527,
+	b"jmath" => 0x237,
+	b"jopf" => 0x1d55b,
+	b"jscr" => 0x1d4bf,
+	b"jsercy" => 0x458,
+	b"jukcy" => 0x454,
+	b"kappa" => 0x3ba,
+	b"kappav" => 0x3f0,
+	b"kcedil" => 0x137,
+	b"kcy" => 0x43a,
+	b"kfr" => 0x1d528,
+	b"kgreen" => 0x138,
+	b"khcy" => 0x445,
+	b"kjcy" => 0x45c,
+	b"kopf" => 0x1d55c,
+	b"kscr" => 0x1d4c0,
+	b"lAarr" => 0x21da,
+	b"lArr" => 0x21d0,
+	b"lAtail" => 0x291b,
+	b"lBarr" => 0x290e,
+	b"lE" => 0x2266,
+	b"lEg" => 0x2a8b,
+	b"lHar" => 0x2962,
+	b"lacute" => 0x13a,
+	b"laemptyv" => 0x29b4,
+	b"lagran" => 0x2112,
+	b"lambda" => 0x3bb,
+	b"lang" => 0x27e8,
+	b"langd" => 0x2991,
+	b"langle" => 0x27e8,
+	b"lap" => 0x2a85,
+	b"laquo" => 0xab,
+	b"larr" => 0x2190,
+	b"larrb" => 0x21e4,
+	b"larrbfs" => 0x291f,
+	b"larrfs" => 0x291d,
+	b"larrhk" => 0x21a9,
+	b"larrlp" => 0x21ab,
+	b"larrpl" => 0x2939,
+	b"larrsim" => 0x2973,
+	b"larrtl" => 0x21a2,
+	b"lat" => 0x2aab,
+	b"latail" => 0x2919,
+	b"late" => 0x2aad,
+	b"lbarr" => 0x290c,
+	b"lbbrk" => 0x2772,
+	b"lbrace" => 0x7b,
+	b"lbrack" => 0x5b,
+	b"lbrke" => 0x298b,
+	b"lbrksld" => 0x298f,
+	b"lbrkslu" => 0x298d,
+	b"lcaron" => 0x13e,
+	b"lcedil" => 0x13c,
+	b"lceil" => 0x2308,
+	b"lcub" => 0x7b,
+	b"lcy" => 0x43b,
+	b"ldca" => 0x2936,
+	b"ldquo" => 0x201c,
+	b"ldquor" => 0x201e,
+	b"ldrdhar" => 0x2967,
+	b"ldrushar" => 0x294b,
+	b"ldsh" => 0x21b2,
+	b"le" => 0x2264,
+	b"leftarrow" => 0x2190,
+	b"leftarrowtail" => 0x21a2,
+	b"leftharpoondown" => 0x21bd,
+	b"leftharpoonup" => 0x21bc,
+	b"leftleftarrows" => 0x21c7,
+	b"leftrightarrow" => 0x2194,
+	b"leftrightarrows" => 0x21c6,
+	b"leftrightharpoons" => 0x21cb,
+	b"leftrightsquigarrow" => 0x21ad,
+	b"leftthreetimes" => 0x22cb,
+	b"leg" => 0x22da,
+	b"leq" => 0x2264,
+	b"leqq" => 0x2266,
+	b"leqslant" => 0x2a7d,
+	b"les" => 0x2a7d,
+	b"lescc" => 0x2aa8,
+	b"lesdot" => 0x2a7f,
+	b"lesdoto" => 0x2a81,
+	b"lesdotor" => 0x2a83,
+	b"lesges" => 0x2a93,
+	b"lessapprox" => 0x2a85,
+	b"lessdot" => 0x22d6,
+	b"lesseqgtr" => 0x22da,
+	b"lesseqqgtr" => 0x2a8b,
+	b"lessgtr" => 0x2276,
+	b"lesssim" => 0x2272,
+	b"lfisht" => 0x297c,
+	b"lfloor" => 0x230a,
+	b"lfr" => 0x1d529,
+	b"lg" => 0x2276,
+	b"lgE" => 0x2a91,
+	b"lhard" => 0x21bd,
+	b"lharu" => 0x21bc,
+	b"lharul" => 0x296a,
+	b"lhblk" => 0x2584,
+	b"ljcy" => 0x459,
+	b"ll" => 0x226a,
+	b"llarr" => 0x21c7,
+	b"llcorner" => 0x231e,
+	b"llhard" => 0x296b,
+	b"lltri" => 0x25fa,
+	b"lmidot" => 0x140,
+	b"lmoust" => 0x23b0,
+	b"lmoustache" => 0x23b0,
+	b"lnE" => 0x2268,
+	b"lnap" => 0x2a89,
+	b"lnapprox" => 0x2a89,
+	b"lne" => 0x2a87,
+	b"lneq" => 0x2a87,
+	b"lneqq" => 0x2268,
+	b"lnsim" => 0x22e6,
+	b"loang" => 0x27ec,
+	b"loarr" => 0x21fd,
+	b"lobrk" => 0x27e6,
+	b"longleftarrow" => 0x27f5,
+	b"longleftrightarrow" => 0x27f7,
+	b"longmapsto" => 0x27fc,
+	b"longrightarrow" => 0x27f6,
+	b"looparrowleft" => 0x21ab,
+	b"looparrowright" => 0x21ac,
+	b"lopar" => 0x2985,
+	b"lopf" => 0x1d55d,
+	b"loplus" => 0x2a2d,
+	b"lotimes" => 0x2a34,
+	b"lowast" => 0x2217,
+	b"lowbar" => 0x5f,
+	b"loz" => 0x25ca,
+	b"lozenge" => 0x25ca,
+	b"lozf" => 0x29eb,
+	b"lpar" => 0x28,
+	b"lparlt" => 0x2993,
+	b"lrarr" => 0x21c6,
+	b"lrcorner" => 0x231f,
+	b"lrhar" => 0x21cb,
+	b"lrhard" => 0x296d,
+	b"lrm" => 0x200e,
+	b"lrtri" => 0x22bf,
+	b"lsaquo" => 0x2039,
+	b"lscr" => 0x1d4c1,
+	b"lsh" => 0x21b0,
+	b"lsim" => 0x2272,
+	b"lsime" => 0x2a8d,
+	b"lsimg" => 0x2a8f,
+	b"lsqb" => 0x5b,
+	b"lsquo" => 0x2018,
+	b"lsquor" => 0x201a,
+	b"lstrok" => 0x142,
+	b"lt" => 0x3c,
+	b"ltcc" => 0x2aa6,
+	b"ltcir" => 0x2a79,
+	b"ltdot" => 0x22d6,
+	b"lthree" => 0x22cb,
+	b"ltimes" => 0x22c9,
+	b"ltlarr" => 0x2976,
+	b"ltquest" => 0x2a7b,
+	b"ltrPar" => 0x2996,
+	b"ltri" => 0x25c3,
+	b"ltrie" => 0x22b4,
+	b"ltrif" => 0x25c2,
+	b"lurdshar" => 0x294a,
+	b"luruhar" => 0x2966,
+	b"mDDot" => 0x223a,
+	b"macr" => 0xaf,
+	b"male" => 0x2642,
+	b"malt" => 0x2720,
+	b"maltese" => 0x2720,
+	b"map" => 0x21a6,
+	b"mapsto" => 0x21a6,
+	b"mapstodown" => 0x21a7,
+	b"mapstoleft" => 0x21a4,
+	b"mapstoup" => 0x21a5,
+	b"marker" => 0x25ae,
+	b"mcomma" => 0x2a29,
+	b"mcy" => 0x43c,
+	b"mdash" => 0x2014,
+	b"measuredangle" => 0x2221,
+	b"mfr" => 0x1d52a,
+	b"mho" => 0x2127,
+	b"micro" => 0xb5,
+	b"mid" => 0x2223,
+	b"midast" => 0x2a,
+	b"midcir" => 0x2af0,
+	b"middot" => 0xb7,
+	b"minus" => 0x2212,
+	b"minusb" => 0x229f,
+	b"minusd" => 0x2238,
+	b"minusdu" => 0x2a2a,
+	b"mlcp" => 0x2adb,
+	b"mldr" => 0x2026,
+	b"mnplus" => 0x2213,
+	b"models" => 0x22a7,
+	b"mopf" => 0x1d55e,
+	b"mp" => 0x2213,
+	b"mscr" => 0x1d4c2,
+	b"mstpos" => 0x223e,
+	b"mu" => 0x3bc,
+	b"multimap" => 0x22b8,
+	b"mumap" => 0x22b8,
+	b"nLeftarrow" => 0x21cd,
+	b"nLeftrightarrow" => 0x21ce,
+	b"nRightarrow" => 0x21cf,
+	b"nVDash" => 0x22af,
+	b"nVdash" => 0x22ae,
+	b"nabla" => 0x2207,
+	b"nacute" => 0x144,
+	b"nap" => 0x2249,
+	b"napos" => 0x149,
+	b"napprox" => 0x2249,
+	b"natur" => 0x266e,
+	b"natural" => 0x266e,
+	b"naturals" => 0x2115,
+	b"nbsp" => 0xa0,
+	b"ncap" => 0x2a43,
+	b"ncaron" => 0x148,
+	b"ncedil" => 0x146,
+	b"ncong" => 0x2247,
+	b"ncup" => 0x2a42,
+	b"ncy" => 0x43d,
+	b"ndash" => 0x2013,
+	b"ne" => 0x2260,
+	b"neArr" => 0x21d7,
+	b"nearhk" => 0x2924,
+	b"nearr" => 0x2197,
+	b"nearrow" => 0x2197,
+	b"nequiv" => 0x2262,
+	b"nesear" => 0x2928,
+	b"nexist" => 0x2204,
+	b"nexists" => 0x2204,
+	b"nfr" => 0x1d52b,
+	b"nge" => 0x2271,
+	b"ngeq" => 0x2271,
+	b"ngsim" => 0x2275,
+	b"ngt" => 0x226f,
+	b"ngtr" => 0x226f,
+	b"nhArr" => 0x21ce,
+	b"nharr" => 0x21ae,
+	b"nhpar" => 0x2af2,
+	b"ni" => 0x220b,
+	b"nis" => 0x22fc,
+	b"nisd" => 0x22fa,
+	b"niv" => 0x220b,
+	b"njcy" => 0x45a,
+	b"nlArr" => 0x21cd,
+	b"nlarr" => 0x219a,
+	b"nldr" => 0x2025,
+	b"nle" => 0x2270,
+	b"nleftarrow" => 0x219a,
+	b"nleftrightarrow" => 0x21ae,
+	b"nleq" => 0x2270,
+	b"nless" => 0x226e,
+	b"nlsim" => 0x2274,
+	b"nlt" => 0x226e,
+	b"nltri" => 0x22ea,
+	b"nltrie" => 0x22ec,
+	b"nmid" => 0x2224,
+	b"nopf" => 0x1d55f,
+	b"not" => 0xac,
+	b"notin" => 0x2209,
+	b"notinva" => 0x2209,
+	b"notinvb" => 0x22f7,
+	b"notinvc" => 0x22f6,
+	b"notni" => 0x220c,
+	b"notniva" => 0x220c,
+	b"notnivb" => 0x22fe,
+	b"notnivc" => 0x22fd,
+	b"npar" => 0x2226,
+	b"nparallel" => 0x2226,
+	b"npolint" => 0x2a14,
+	b"npr" => 0x2280,
+	b"nprcue" => 0x22e0,
+	b"nprec" => 0x2280,
+	b"nrArr" => 0x21cf,
+	b"nrarr" => 0x219b,
+	b"nrightarrow" => 0x219b,
+	b"nrtri" => 0x22eb,
+	b"nrtrie" => 0x22ed,
+	b"nsc" => 0x2281,
+	b"nsccue" => 0x22e1,
+	b"nscr" => 0x1d4c3,
+	b"nshortmid" => 0x2224,
+	b"nshortparallel" => 0x2226,
+	b"nsim" => 0x2241,
+	b"nsime" => 0x2244,
+	b"nsimeq" => 0x2244,
+	b"nsmid" => 0x2224,
+	b"nspar" => 0x2226,
+	b"nsqsube" => 0x22e2,
+	b"nsqsupe" => 0x22e3,
+	b"nsub" => 0x2284,
+	b"nsube" => 0x2288,
+	b"nsubseteq" => 0x2288,
+	b"nsucc" => 0x2281,
+	b"nsup" => 0x2285,
+	b"nsupe" => 0x2289,
+	b"nsupseteq" => 0x2289,
+	b"ntgl" => 0x2279,
+	b"ntilde" => 0xf1,
+	b"ntlg" => 0x2278,
+	b"ntriangleleft" => 0x22ea,
+	b"ntrianglelefteq" => 0x22ec,
+	b"ntriangleright" => 0x22eb,
+	b"ntrianglerighteq" => 0x22ed,
+	b"nu" => 0x3bd,
+	b"num" => 0x23,
+	b"numero" => 0x2116,
+	b"numsp" => 0x2007,
+	b"nvDash" => 0x22ad,
+	b"nvHarr" => 0x2904,
+	b"nvdash" => 0x22ac,
+	b"nvinfin" => 0x29de,
+	b"nvlArr" => 0x2902,
+	b"nvrArr" => 0x2903,
+	b"nwArr" => 0x21d6,
+	b"nwarhk" => 0x2923,
+	b"nwarr" => 0x2196,
+	b"nwarrow" => 0x2196,
+	b"nwnear" => 0x2927,
+	b"oS" => 0x24c8,
+	b"oacute" => 0xf3,
+	b"oast" => 0x229b,
+	b"ocir" => 0x229a,
+	b"ocirc" => 0xf4,
+	b"ocy" => 0x43e,
+	b"odash" => 0x229d,
+	b"odblac" => 0x151,
+	b"odiv" => 0x2a38,
+	b"odot" => 0x2299,
+	b"odsold" => 0x29bc,
+	b"oelig" => 0x153,
+	b"ofcir" => 0x29bf,
+	b"ofr" => 0x1d52c,
+	b"ogon" => 0x2db,
+	b"ograve" => 0xf2,
+	b"ogt" => 0x29c1,
+	b"ohbar" => 0x29b5,
+	b"ohm" => 0x2126,
+	b"oint" => 0x222e,
+	b"olarr" => 0x21ba,
+	b"olcir" => 0x29be,
+	b"olcross" => 0x29bb,
+	b"oline" => 0x203e,
+	b"olt" => 0x29c0,
+	b"omacr" => 0x14d,
+	b"omega" => 0x3c9,
+	b"omicron" => 0x3bf,
+	b"omid" => 0x29b6,
+	b"ominus" => 0x2296,
+	b"oopf" => 0x1d560,
+	b"opar" => 0x29b7,
+	b"operp" => 0x29b9,
+	b"oplus" => 0x2295,
+	b"or" => 0x2228,
+	b"orarr" => 0x21bb,
+	b"ord" => 0x2a5d,
+	b"order" => 0x2134,
+	b"orderof" => 0x2134,
+	b"ordf" => 0xaa,
+	b"ordm" => 0xba,
+	b"origof" => 0x22b6,
+	b"oror" => 0x2a56,
+	b"orslope" => 0x2a57,
+	b"orv" => 0x2a5b,
+	b"oscr" => 0x2134,
+	b"oslash" => 0xf8,
+	b"osol" => 0x2298,
+	b"otilde" => 0xf5,
+	b"otimes" => 0x2297,
+	b"otimesas" => 0x2a36,
+	b"ouml" => 0xf6,
+	b"ovbar" => 0x233d,
+	b"par" => 0x2225,
+	b"para" => 0xb6,
+	b"parallel" => 0x2225,
+	b"parsim" => 0x2af3,
+	b"parsl" => 0x2afd,
+	b"part" => 0x2202,
+	b"pcy" => 0x43f,
+	b"percnt" => 0x25,
+	b"period" => 0x2e,
+	b"permil" => 0x2030,
+	b"perp" => 0x22a5,
+	b"pertenk" => 0x2031,
+	b"pfr" => 0x1d52d,
+	b"phi" => 0x3c6,
+	b"phiv" => 0x3c6,
+	b"phmmat" => 0x2133,
+	b"phone" => 0x260e,
+	b"pi" => 0x3c0,
+	b"pitchfork" => 0x22d4,
+	b"piv" => 0x3d6,
+	b"planck" => 0x210f,
+	b"planckh" => 0x210e,
+	b"plankv" => 0x210f,
+	b"plus" => 0x2b,
+	b"plusacir" => 0x2a23,
+	b"plusb" => 0x229e,
+	b"pluscir" => 0x2a22,
+	b"plusdo" => 0x2214,
+	b"plusdu" => 0x2a25,
+	b"pluse" => 0x2a72,
+	b"plusmn" => 0xb1,
+	b"plussim" => 0x2a26,
+	b"plustwo" => 0x2a27,
+	b"pm" => 0xb1,
+	b"pointint" => 0x2a15,
+	b"popf" => 0x1d561,
+	b"pound" => 0xa3,
+	b"pr" => 0x227a,
+	b"prE" => 0x2ab3,
+	b"prap" => 0x2ab7,
+	b"prcue" => 0x227c,
+	b"pre" => 0x2aaf,
+	b"prec" => 0x227a,
+	b"precapprox" => 0x2ab7,
+	b"preccurlyeq" => 0x227c,
+	b"preceq" => 0x2aaf,
+	b"precnapprox" => 0x2ab9,
+	b"precneqq" => 0x2ab5,
+	b"precnsim" => 0x22e8,
+	b"precsim" => 0x227e,
+	b"prime" => 0x2032,
+	b"primes" => 0x2119,
+	b"prnE" => 0x2ab5,
+	b"prnap" => 0x2ab9,
+	b"prnsim" => 0x22e8,
+	b"prod" => 0x220f,
+	b"profalar" => 0x232e,
+	b"profline" => 0x2312,
+	b"profsurf" => 0x2313,
+	b"prop" => 0x221d,
+	b"propto" => 0x221d,
+	b"prsim" => 0x227e,
+	b"prurel" => 0x22b0,
+	b"pscr" => 0x1d4c5,
+	b"psi" => 0x3c8,
+	b"puncsp" => 0x2008,
+	b"qfr" => 0x1d52e,
+	b"qint" => 0x2a0c,
+	b"qopf" => 0x1d562,
+	b"qprime" => 0x2057,
+	b"qscr" => 0x1d4c6,
+	b"quaternions" => 0x210d,
+	b"quatint" => 0x2a16,
+	b"quest" => 0x3f,
+	b"questeq" => 0x225f,
+	b"quot" => 0x22,
+	b"rAarr" => 0x21db,
+	b"rArr" => 0x21d2,
+	b"rAtail" => 0x291c,
+	b"rBarr" => 0x290f,
+	b"rHar" => 0x2964,
+	b"race" => 0x29da,
+	b"racute" => 0x155,
+	b"radic" => 0x221a,
+	b"raemptyv" => 0x29b3,
+	b"rang" => 0x27e9,
+	b"rangd" => 0x2992,
+	b"range" => 0x29a5,
+	b"rangle" => 0x27e9,
+	b"raquo" => 0xbb,
+	b"rarr" => 0x2192,
+	b"rarrap" => 0x2975,
+	b"rarrb" => 0x21e5,
+	b"rarrbfs" => 0x2920,
+	b"rarrc" => 0x2933,
+	b"rarrfs" => 0x291e,
+	b"rarrhk" => 0x21aa,
+	b"rarrlp" => 0x21ac,
+	b"rarrpl" => 0x2945,
+	b"rarrsim" => 0x2974,
+	b"rarrtl" => 0x21a3,
+	b"rarrw" => 0x219d,
+	b"ratail" => 0x291a,
+	b"ratio" => 0x2236,
+	b"rationals" => 0x211a,
+	b"rbarr" => 0x290d,
+	b"rbbrk" => 0x2773,
+	b"rbrace" => 0x7d,
+	b"rbrack" => 0x5d,
+	b"rbrke" => 0x298c,
+	b"rbrksld" => 0x298e,
+	b"rbrkslu" => 0x2990,
+	b"rcaron" => 0x159,
+	b"rcedil" => 0x157,
+	b"rceil" => 0x2309,
+	b"rcub" => 0x7d,
+	b"rcy" => 0x440,
+	b"rdca" => 0x2937,
+	b"rdldhar" => 0x2969,
+	b"rdquo" => 0x201d,
+	b"rdquor" => 0x201d,
+	b"rdsh" => 0x21b3,
+	b"real" => 0x211c,
+	b"realine" => 0x211b,
+	b"realpart" => 0x211c,
+	b"reals" => 0x211d,
+	b"rect" => 0x25ad,
+	b"reg" => 0xae,
+	b"rfisht" => 0x297d,
+	b"rfloor" => 0x230b,
+	b"rfr" => 0x1d52f,
+	b"rhard" => 0x21c1,
+	b"rharu" => 0x21c0,
+	b"rharul" => 0x296c,
+	b"rho" => 0x3c1,
+	b"rhov" => 0x3f1,
+	b"rightarrow" => 0x2192,
+	b"rightarrowtail" => 0x21a3,
+	b"rightharpoondown" => 0x21c1,
+	b"rightharpoonup" => 0x21c0,
+	b"rightleftarrows" => 0x21c4,
+	b"rightleftharpoons" => 0x21cc,
+	b"rightrightarrows" => 0x21c9,
+	b"rightsquigarrow" => 0x219d,
+	b"rightthreetimes" => 0x22cc,
+	b"ring" => 0x2da,
+	b"risingdotseq" => 0x2253,
+	b"rlarr" => 0x21c4,
+	b"rlhar" => 0x21cc,
+	b"rlm" => 0x200f,
+	b"rmoust" => 0x23b1,
+	b"rmoustache" => 0x23b1,
+	b"rnmid" => 0x2aee,
+	b"roang" => 0x27ed,
+	b"roarr" => 0x21fe,
+	b"robrk" => 0x27e7,
+	b"ropar" => 0x2986,
+	b"ropf" => 0x1d563,
+	b"roplus" => 0x2a2e,
+	b"rotimes" => 0x2a35,
+	b"rpar" => 0x29,
+	b"rpargt" => 0x2994,
+	b"rppolint" => 0x2a12,
+	b"rrarr" => 0x21c9,
+	b"rsaquo" => 0x203a,
+	b"rscr" => 0x1d4c7,
+	b"rsh" => 0x21b1,
+	b"rsqb" => 0x5d,
+	b"rsquo" => 0x2019,
+	b"rsquor" => 0x2019,
+	b"rthree" => 0x22cc,
+	b"rtimes" => 0x22ca,
+	b"rtri" => 0x25b9,
+	b"rtrie" => 0x22b5,
+	b"rtrif" => 0x25b8,
+	b"rtriltri" => 0x29ce,
+	b"ruluhar" => 0x2968,
+	b"rx" => 0x211e,
+	b"sacute" => 0x15b,
+	b"sbquo" => 0x201a,
+	b"sc" => 0x227b,
+	b"scE" => 0x2ab4,
+	b"scap" => 0x2ab8,
+	b"scaron" => 0x161,
+	b"sccue" => 0x227d,
+	b"sce" => 0x2ab0,
+	b"scedil" => 0x15f,
+	b"scirc" => 0x15d,
+	b"scnE" => 0x2ab6,
+	b"scnap" => 0x2aba,
+	b"scnsim" => 0x22e9,
+	b"scpolint" => 0x2a13,
+	b"scsim" => 0x227f,
+	b"scy" => 0x441,
+	b"sdot" => 0x22c5,
+	b"sdotb" => 0x22a1,
+	b"sdote" => 0x2a66,
+	b"seArr" => 0x21d8,
+	b"searhk" => 0x2925,
+	b"searr" => 0x2198,
+	b"searrow" => 0x2198,
+	b"sect" => 0xa7,
+	b"semi" => 0x3b,
+	b"seswar" => 0x2929,
+	b"setminus" => 0x2216,
+	b"setmn" => 0x2216,
+	b"sext" => 0x2736,
+	b"sfr" => 0x1d530,
+	b"sfrown" => 0x2322,
+	b"sharp" => 0x266f,
+	b"shchcy" => 0x449,
+	b"shcy" => 0x448,
+	b"shortmid" => 0x2223,
+	b"shortparallel" => 0x2225,
+	b"shy" => 0xad,
+	b"sigma" => 0x3c3,
+	b"sigmaf" => 0x3c2,
+	b"sigmav" => 0x3c2,
+	b"sim" => 0x223c,
+	b"simdot" => 0x2a6a,
+	b"sime" => 0x2243,
+	b"simeq" => 0x2243,
+	b"simg" => 0x2a9e,
+	b"simgE" => 0x2aa0,
+	b"siml" => 0x2a9d,
+	b"simlE" => 0x2a9f,
+	b"simne" => 0x2246,
+	b"simplus" => 0x2a24,
+	b"simrarr" => 0x2972,
+	b"slarr" => 0x2190,
+	b"smallsetminus" => 0x2216,
+	b"smashp" => 0x2a33,
+	b"smeparsl" => 0x29e4,
+	b"smid" => 0x2223,
+	b"smile" => 0x2323,
+	b"smt" => 0x2aaa,
+	b"smte" => 0x2aac,
+	b"softcy" => 0x44c,
+	b"sol" => 0x2f,
+	b"solb" => 0x29c4,
+	b"solbar" => 0x233f,
+	b"sopf" => 0x1d564,
+	b"spades" => 0x2660,
+	b"spadesuit" => 0x2660,
+	b"spar" => 0x2225,
+	b"sqcap" => 0x2293,
+	b"sqcup" => 0x2294,
+	b"sqsub" => 0x228f,
+	b"sqsube" => 0x2291,
+	b"sqsubset" => 0x228f,
+	b"sqsubseteq" => 0x2291,
+	b"sqsup" => 0x2290,
+	b"sqsupe" => 0x2292,
+	b"sqsupset" => 0x2290,
+	b"sqsupseteq" => 0x2292,
+	b"squ" => 0x25a1,
+	b"square" => 0x25a1,
+	b"squarf" => 0x25aa,
+	b"squf" => 0x25aa,
+	b"srarr" => 0x2192,
+	b"sscr" => 0x1d4c8,
+	b"ssetmn" => 0x2216,
+	b"ssmile" => 0x2323,
+	b"sstarf" => 0x22c6,
+	b"star" => 0x2606,
+	b"starf" => 0x2605,
+	b"straightepsilon" => 0x3f5,
+	b"straightphi" => 0x3d5,
+	b"strns" => 0xaf,
+	b"sub" => 0x2282,
+	b"subE" => 0x2ac5,
+	b"subdot" => 0x2abd,
+	b"sube" => 0x2286,
+	b"subedot" => 0x2ac3,
+	b"submult" => 0x2ac1,
+	b"subnE" => 0x2acb,
+	b"subne" => 0x228a,
+	b"subplus" => 0x2abf,
+	b"subrarr" => 0x2979,
+	b"subset" => 0x2282,
+	b"subseteq" => 0x2286,
+	b"subseteqq" => 0x2ac5,
+	b"subsetneq" => 0x228a,
+	b"subsetneqq" => 0x2acb,
+	b"subsim" => 0x2ac7,
+	b"subsub" => 0x2ad5,
+	b"subsup" => 0x2ad3,
+	b"succ" => 0x227b,
+	b"succapprox" => 0x2ab8,
+	b"succcurlyeq" => 0x227d,
+	b"succeq" => 0x2ab0,
+	b"succnapprox" => 0x2aba,
+	b"succneqq" => 0x2ab6,
+	b"succnsim" => 0x22e9,
+	b"succsim" => 0x227f,
+	b"sum" => 0x2211,
+	b"sung" => 0x266a,
+	b"sup" => 0x2283,
+	b"sup1" => 0xb9,
+	b"sup2" => 0xb2,
+	b"sup3" => 0xb3,
+	b"supE" => 0x2ac6,
+	b"supdot" => 0x2abe,
+	b"supdsub" => 0x2ad8,
+	b"supe" => 0x2287,
+	b"supedot" => 0x2ac4,
+	b"suphsub" => 0x2ad7,
+	b"suplarr" => 0x297b,
+	b"supmult" => 0x2ac2,
+	b"supnE" => 0x2acc,
+	b"supne" => 0x228b,
+	b"supplus" => 0x2ac0,
+	b"supset" => 0x2283,
+	b"supseteq" => 0x2287,
+	b"supseteqq" => 0x2ac6,
+	b"supsetneq" => 0x228b,
+	b"supsetneqq" => 0x2acc,
+	b"supsim" => 0x2ac8,
+	b"supsub" => 0x2ad4,
+	b"supsup" => 0x2ad6,
+	b"swArr" => 0x21d9,
+	b"swarhk" => 0x2926,
+	b"swarr" => 0x2199,
+	b"swarrow" => 0x2199,
+	b"swnwar" => 0x292a,
+	b"szlig" => 0xdf,
+	b"target" => 0x2316,
+	b"tau" => 0x3c4,
+	b"tbrk" => 0x23b4,
+	b"tcaron" => 0x165,
+	b"tcedil" => 0x163,
+	b"tcy" => 0x442,
+	b"tdot" => 0x20db,
+	b"telrec" => 0x2315,
+	b"tfr" => 0x1d531,
+	b"there4" => 0x2234,
+	b"therefore" => 0x2234,
+	b"theta" => 0x3b8,
+	b"thetasym" => 0x3d1,
+	b"thetav" => 0x3d1,
+	b"thickapprox" => 0x2248,
+	b"thicksim" => 0x223c,
+	b"thinsp" => 0x2009,
+	b"thkap" => 0x2248,
+	b"thksim" => 0x223c,
+	b"thorn" => 0xfe,
+	b"tilde" => 0x2dc,
+	b"times" => 0xd7,
+	b"timesb" => 0x22a0,
+	b"timesbar" => 0x2a31,
+	b"timesd" => 0x2a30,
+	b"tint" => 0x222d,
+	b"toea" => 0x2928,
+	b"top" => 0x22a4,
+	b"topbot" => 0x2336,
+	b"topcir" => 0x2af1,
+	b"topf" => 0x1d565,
+	b"topfork" => 0x2ada,
+	b"tosa" => 0x2929,
+	b"tprime" => 0x2034,
+	b"trade" => 0x2122,
+	b"triangle" => 0x25b5,
+	b"triangledown" => 0x25bf,
+	b"triangleleft" => 0x25c3,
+	b"trianglelefteq" => 0x22b4,
+	b"triangleq" => 0x225c,
+	b"triangleright" => 0x25b9,
+	b"trianglerighteq" => 0x22b5,
+	b"tridot" => 0x25ec,
+	b"trie" => 0x225c,
+	b"triminus" => 0x2a3a,
+	b"triplus" => 0x2a39,
+	b"trisb" => 0x29cd,
+	b"tritime" => 0x2a3b,
+	b"trpezium" => 0x23e2,
+	b"tscr" => 0x1d4c9,
+	b"tscy" => 0x446,
+	b"tshcy" => 0x45b,
+	b"tstrok" => 0x167,
+	b"twixt" => 0x226c,
+	b"twoheadleftarrow" => 0x219e,
+	b"twoheadrightarrow" => 0x21a0,
+	b"uArr" => 0x21d1,
+	b"uHar" => 0x2963,
+	b"uacute" => 0xfa,
+	b"uarr" => 0x2191,
+	b"ubrcy" => 0x45e,
+	b"ubreve" => 0x16d,
+	b"ucirc" => 0xfb,
+	b"ucy" => 0x443,
+	b"udarr" => 0x21c5,
+	b"udblac" => 0x171,
+	b"udhar" => 0x296e,
+	b"ufisht" => 0x297e,
+	b"ufr" => 0x1d532,
+	b"ugrave" => 0xf9,
+	b"uharl" => 0x21bf,
+	b"uharr" => 0x21be,
+	b"uhblk" => 0x2580,
+	b"ulcorn" => 0x231c,
+	b"ulcorner" => 0x231c,
+	b"ulcrop" => 0x230f,
+	b"ultri" => 0x25f8,
+	b"umacr" => 0x16b,
+	b"uml" => 0xa8,
+	b"uogon" => 0x173,
+	b"uopf" => 0x1d566,
+	b"uparrow" => 0x2191,
+	b"updownarrow" => 0x2195,
+	b"upharpoonleft" => 0x21bf,
+	b"upharpoonright" => 0x21be,
+	b"uplus" => 0x228e,
+	b"upsi" => 0x3c5,
+	b"upsih" => 0x3d2,
+	b"upsilon" => 0x3c5,
+	b"upuparrows" => 0x21c8,
+	b"urcorn" => 0x231d,
+	b"urcorner" => 0x231d,
+	b"urcrop" => 0x230e,
+	b"uring" => 0x16f,
+	b"urtri" => 0x25f9,
+	b"uscr" => 0x1d4ca,
+	b"utdot" => 0x22f0,
+	b"utilde" => 0x169,
+	b"utri" => 0x25b5,
+	b"utrif" => 0x25b4,
+	b"uuarr" => 0x21c8,
+	b"uuml" => 0xfc,
+	b"uwangle" => 0x29a7,
+	b"vArr" => 0x21d5,
+	b"vBar" => 0x2ae8,
+	b"vBarv" => 0x2ae9,
+	b"vDash" => 0x22a8,
+	b"vangrt" => 0x299c,
+	b"varepsilon" => 0x3b5,
+	b"varkappa" => 0x3f0,
+	b"varnothing" => 0x2205,
+	b"varphi" => 0x3c6,
+	b"varpi" => 0x3d6,
+	b"varpropto" => 0x221d,
+	b"varr" => 0x2195,
+	b"varrho" => 0x3f1,
+	b"varsigma" => 0x3c2,
+	b"vartheta" => 0x3d1,
+	b"vartriangleleft" => 0x22b2,
+	b"vartriangleright" => 0x22b3,
+	b"vcy" => 0x432,
+	b"vdash" => 0x22a2,
+	b"vee" => 0x2228,
+	b"veebar" => 0x22bb,
+	b"veeeq" => 0x225a,
+	b"vellip" => 0x22ee,
+	b"verbar" => 0x7c,
+	b"vert" => 0x7c,
+	b"vfr" => 0x1d533,
+	b"vltri" => 0x22b2,
+	b"vopf" => 0x1d567,
+	b"vprop" => 0x221d,
+	b"vrtri" => 0x22b3,
+	b"vscr" => 0x1d4cb,
+	b"vzigzag" => 0x299a,
+	b"wcirc" => 0x175,
+	b"wedbar" => 0x2a5f,
+	b"wedge" => 0x2227,
+	b"wedgeq" => 0x2259,
+	b"weierp" => 0x2118,
+	b"wfr" => 0x1d534,
+	b"wopf" => 0x1d568,
+	b"wp" => 0x2118,
+	b"wr" => 0x2240,
+	b"wreath" => 0x2240,
+	b"wscr" => 0x1d4cc,
+	b"xcap" => 0x22c2,
+	b"xcirc" => 0x25ef,
+	b"xcup" => 0x22c3,
+	b"xdtri" => 0x25bd,
+	b"xfr" => 0x1d535,
+	b"xhArr" => 0x27fa,
+	b"xharr" => 0x27f7,
+	b"xi" => 0x3be,
+	b"xlArr" => 0x27f8,
+	b"xlarr" => 0x27f5,
+	b"xmap" => 0x27fc,
+	b"xnis" => 0x22fb,
+	b"xodot" => 0x2a00,
+	b"xopf" => 0x1d569,
+	b"xoplus" => 0x2a01,
+	b"xotime" => 0x2a02,
+	b"xrArr" => 0x27f9,
+	b"xrarr" => 0x27f6,
+	b"xscr" => 0x1d4cd,
+	b"xsqcup" => 0x2a06,
+	b"xuplus" => 0x2a04,
+	b"xutri" => 0x25b3,
+	b"xvee" => 0x22c1,
+	b"xwedge" => 0x22c0,
+	b"yacute" => 0xfd,
+	b"yacy" => 0x44f,
+	b"ycirc" => 0x177,
+	b"ycy" => 0x44b,
+	b"yen" => 0xa5,
+	b"yfr" => 0x1d536,
+	b"yicy" => 0x457,
+	b"yopf" => 0x1d56a,
+	b"yscr" => 0x1d4ce,
+	b"yucy" => 0x44e,
+	b"yuml" => 0xff,
+	b"zacute" => 0x17a,
+	b"zcaron" => 0x17e,
+	b"zcy" => 0x437,
+	b"zdot" => 0x17c,
+	b"zeetrf" => 0x2128,
+	b"zeta" => 0x3b6,
+	b"zfr" => 0x1d537,
+	b"zhcy" => 0x436,
+	b"zigrarr" => 0x21dd,
+	b"zopf" => 0x1d56b,
+	b"zscr" => 0x1d4cf,
+	b"zwj" => 0x200d,
+	b"zwnj" => 0x200c,
+};
+
+pub fn get_entity_reference_code_point(name: &[u8]) -> Option<u32> {
+    ENTITY_REFERENCES.get(name).map(|r| *r)
+}
+
+pub fn is_valid_entity_reference_name_char(c: u8) -> bool {
+    c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z'
+}
diff --git a/src/spec/mod.rs b/src/spec/mod.rs
new file mode 100644
index 0000000..013caa6
--- /dev/null
+++ b/src/spec/mod.rs
@@ -0,0 +1,3 @@
+pub mod codepoint;
+pub mod entity;
+pub mod tag;
diff --git a/src/rule/tag/child/blacklist.c b/src/spec/tag/child/blacklist.c
similarity index 100%
rename from src/rule/tag/child/blacklist.c
rename to src/spec/tag/child/blacklist.c
diff --git a/src/rule/tag/child/whitelist.c b/src/spec/tag/child/whitelist.c
similarity index 100%
rename from src/rule/tag/child/whitelist.c
rename to src/spec/tag/child/whitelist.c
diff --git a/src/spec/tag/content.rs b/src/spec/tag/content.rs
new file mode 100644
index 0000000..3af5883
--- /dev/null
+++ b/src/spec/tag/content.rs
@@ -0,0 +1,24 @@
+use ::phf::{phf_set, Set};
+
+pub static CONTENT_TAGS: Set<&'static [u8]> = phf_set! {
+	b"address",
+	b"audio",
+	b"button",
+	b"canvas",
+	b"caption",
+	b"figcaption",
+	b"h1",
+	b"h2",
+	b"h3",
+	b"h4",
+	b"h5",
+	b"h6",
+	b"legend",
+	b"meter",
+	b"object",
+	b"option",
+	b"p",
+	b"summary", // Can also contain a heading.
+	b"textarea",
+	b"video",
+};
diff --git a/src/spec/tag/contentfirst.rs b/src/spec/tag/contentfirst.rs
new file mode 100644
index 0000000..1d9ed4f
--- /dev/null
+++ b/src/spec/tag/contentfirst.rs
@@ -0,0 +1,17 @@
+use ::phf::{phf_set, Set};
+
+pub static CONTENT_FIRST_TAGS: Set<&'static [u8]> = phf_set! {
+	b"dd",
+	b"details",
+	b"dt",
+	b"iframe",
+	b"label",
+	b"li",
+	b"noscript",
+	b"output",
+	b"progress",
+	b"slot",
+	b"td",
+	b"template",
+	b"th",
+};
diff --git a/src/spec/tag/formatting.rs b/src/spec/tag/formatting.rs
new file mode 100644
index 0000000..0360dae
--- /dev/null
+++ b/src/spec/tag/formatting.rs
@@ -0,0 +1,35 @@
+use ::phf::{phf_set, Set};
+
+// Difference to MDN's inline text semantics list: -br, +del, +ins.
+pub static FORMATTING_TAGS: Set<&'static [u8]> = phf_set! {
+	b"a",
+	b"abbr",
+	b"b",
+	b"bdi",
+	b"bdo",
+	b"cite",
+	b"data",
+	b"del",
+	b"dfn",
+	b"em",
+	b"i",
+	b"ins",
+	b"kbd",
+	b"mark",
+	b"q",
+	b"rp",
+	b"rt",
+	b"rtc",
+	b"ruby",
+	b"s",
+	b"samp",
+	b"small",
+	b"span",
+	b"strong",
+	b"sub",
+	b"sup",
+	b"time",
+	b"u",
+	b"var",
+	b"wbr",
+};
diff --git a/src/spec/tag/heading.rs b/src/spec/tag/heading.rs
new file mode 100644
index 0000000..b97faeb
--- /dev/null
+++ b/src/spec/tag/heading.rs
@@ -0,0 +1,11 @@
+use ::phf::{phf_set, Set};
+
+pub static HEADING_TAGS: Set<&'static [u8]> = phf_set! {
+	b"hgroup",
+	b"h1",
+	b"h2",
+	b"h3",
+	b"h4",
+	b"h5",
+	b"h6",
+};
diff --git a/src/spec/tag/html.rs b/src/spec/tag/html.rs
new file mode 100644
index 0000000..bd4d816
--- /dev/null
+++ b/src/spec/tag/html.rs
@@ -0,0 +1,148 @@
+use ::phf::{phf_set, Set};
+
+// Sourced from https://developer.mozilla.org/en-US/docs/Web/HTML/Element at 2018-07-01T05:55:00Z.
+pub static HTML_TAGS: Set<&'static [u8]> = phf_set! {
+	b"a",
+	b"abbr",
+	b"acronym",
+	b"address",
+	b"applet",
+	b"area",
+	b"article",
+	b"aside",
+	b"audio",
+	b"b",
+	b"basefont",
+	b"bdi",
+	b"bdo",
+	b"bgsound",
+	b"big",
+	b"blink",
+	b"blockquote",
+	b"body",
+	b"br",
+	b"button",
+	b"canvas",
+	b"caption",
+	b"center",
+	b"cite",
+	b"code",
+	b"col",
+	b"colgroup",
+	b"command",
+	b"content",
+	b"data",
+	b"datalist",
+	b"dd",
+	b"del",
+	b"details",
+	b"dfn",
+	b"dialog",
+	b"dir",
+	b"div",
+	b"dl",
+	b"dt",
+	b"element",
+	b"em",
+	b"embed",
+	b"fieldset",
+	b"figcaption",
+	b"figure",
+	b"font",
+	b"footer",
+	b"form",
+	b"frame",
+	b"frameset",
+	b"h1",
+	b"h2",
+	b"h3",
+	b"h4",
+	b"h5",
+	b"h6",
+	b"head",
+	b"header",
+	b"hgroup",
+	b"hr",
+	b"html",
+	b"i",
+	b"iframe",
+	b"image",
+	b"img",
+	b"input",
+	b"ins",
+	b"isindex",
+	b"kbd",
+	b"keygen",
+	b"label",
+	b"legend",
+	b"li",
+	b"link",
+	b"listing",
+	b"main",
+	b"map",
+	b"mark",
+	b"marquee",
+	b"menu",
+	b"menuitem",
+	b"meta",
+	b"meter",
+	b"multicol",
+	b"nav",
+	b"nextid",
+	b"nobr",
+	b"noembed",
+	b"noframes",
+	b"noscript",
+	b"object",
+	b"ol",
+	b"optgroup",
+	b"option",
+	b"output",
+	b"p",
+	b"param",
+	b"picture",
+	b"plaintext",
+	b"pre",
+	b"progress",
+	b"q",
+	b"rp",
+	b"rt",
+	b"rtc",
+	b"ruby",
+	b"s",
+	b"samp",
+	b"script",
+	b"section",
+	b"select",
+	b"shadow",
+	b"slot",
+	b"small",
+	b"source",
+	b"spacer",
+	b"span",
+	b"strike",
+	b"strong",
+	b"style",
+	b"sub",
+	b"summary",
+	b"sup",
+	b"table",
+	b"tbody",
+	b"td",
+	b"template",
+	b"textarea",
+	b"tfoot",
+	b"th",
+	b"thead",
+	b"time",
+	b"title",
+	b"tr",
+	b"track",
+	b"tt",
+	b"u",
+	b"ul",
+	b"var",
+	b"video",
+	b"wbr",
+	b"xmp",
+};
diff --git a/src/spec/tag/layout.rs b/src/spec/tag/layout.rs
new file mode 100644
index 0000000..242f84f
--- /dev/null
+++ b/src/spec/tag/layout.rs
@@ -0,0 +1,38 @@
+use ::phf::{phf_set, Set};
+
+pub static LAYOUT_TAGS: Set<&'static [u8]> = phf_set! {
+    // Sectioning tags.
+	b"article",
+	b"aside",
+	b"nav",
+	b"section",
+	// Other tags.
+	b"blockquote",
+	b"body",
+	b"colgroup",
+	b"datalist",
+	b"dialog",
+	b"div",
+	b"dl",
+	b"fieldset",
+	b"figure",
+	b"footer",
+	b"form",
+	b"head",
+	b"header",
+	b"hgroup",
+	b"html",
+	b"main",
+	b"map",
+	b"menu",
+	b"ol",
+	b"optgroup",
+	b"picture",
+	b"select",
+	b"table",
+	b"tbody",
+	b"tfoot",
+	b"thead",
+	b"tr",
+	b"ul",
+};
diff --git a/src/spec/tag/media.rs b/src/spec/tag/media.rs
new file mode 100644
index 0000000..4ea955b
--- /dev/null
+++ b/src/spec/tag/media.rs
@@ -0,0 +1,6 @@
+use ::phf::{phf_set, Set};
+
+pub static MEDIA_TAGS: Set<&'static [u8]> = phf_set! {
+	b"audio",
+	b"video",
+};
diff --git a/src/spec/tag/mod.rs b/src/spec/tag/mod.rs
new file mode 100644
index 0000000..e63b87d
--- /dev/null
+++ b/src/spec/tag/mod.rs
@@ -0,0 +1,12 @@
+pub mod content;
+pub mod contentfirst;
+pub mod formatting;
+pub mod heading;
+pub mod html;
+pub mod layout;
+pub mod media;
+pub mod sectioning;
+pub mod specific;
+pub mod svg;
+pub mod void;
+pub mod wss;
diff --git a/src/rule/tag/parent/blacklist.c b/src/spec/tag/parent/blacklist.c
similarity index 100%
rename from src/rule/tag/parent/blacklist.c
rename to src/spec/tag/parent/blacklist.c
diff --git a/src/rule/tag/parent/whitelist.c b/src/spec/tag/parent/whitelist.c
similarity index 100%
rename from src/rule/tag/parent/whitelist.c
rename to src/spec/tag/parent/whitelist.c
diff --git a/src/spec/tag/sectioning.rs b/src/spec/tag/sectioning.rs
new file mode 100644
index 0000000..a01ee10
--- /dev/null
+++ b/src/spec/tag/sectioning.rs
@@ -0,0 +1,9 @@
+use ::phf::{phf_set, Set};
+
+pub static SECTIONING_TAGS: Set<&'static [u8]> = phf_set! {
+    // Also used by layout tags.
+	b"article",
+	b"aside",
+	b"nav",
+	b"section",
+};
diff --git a/src/spec/tag/specific.rs b/src/spec/tag/specific.rs
new file mode 100644
index 0000000..5b4d57f
--- /dev/null
+++ b/src/spec/tag/specific.rs
@@ -0,0 +1,19 @@
+use ::phf::{phf_set, Set};
+
+// Does not include SVG tags.
+pub static SPECIFIC_HTML_TAGS: Set<&'static [u8]> = phf_set! {
+	b"area",
+	b"base",
+	b"br",
+	b"code", // Reason: unlikely to want to minify.
+	b"col",
+	b"embed",
+	b"hr",
+	b"img",
+	b"input",
+	b"param",
+	b"pre", // Reason: unlikely to want to minify.
+	b"script",
+	b"source",
+	b"track",
+};
diff --git a/src/spec/tag/svg.rs b/src/spec/tag/svg.rs
new file mode 100644
index 0000000..dcbe0e3
--- /dev/null
+++ b/src/spec/tag/svg.rs
@@ -0,0 +1,95 @@
+use ::phf::{phf_set, Set};
+
+// Sourced from https://developer.mozilla.org/en-US/docs/Web/SVG/Element at 2018-08-04T03:50:00Z.
+pub static SVG_TAGS: Set<&'static [u8]> = phf_set! {
+	b"a",
+	b"altGlyph",
+	b"altGlyphDef",
+	b"altGlyphItem",
+	b"animate",
+	b"animateColor",
+	b"animateMotion",
+	b"animateTransform",
+	b"circle",
+	b"clipPath",
+	b"color-profile",
+	b"cursor",
+	b"defs",
+	b"desc",
+	b"discard",
+	b"ellipse",
+	b"feBlend",
+	b"feColorMatrix",
+	b"feComponentTransfer",
+	b"feComposite",
+	b"feConvolveMatrix",
+	b"feDiffuseLighting",
+	b"feDisplacementMap",
+	b"feDistantLight",
+	b"feDropShadow",
+	b"feFlood",
+	b"feFuncA",
+	b"feFuncB",
+	b"feFuncG",
+	b"feFuncR",
+	b"feGaussianBlur",
+	b"feImage",
+	b"feMerge",
+	b"feMergeNode",
+	b"feMorphology",
+	b"feOffset",
+	b"fePointLight",
+	b"feSpecularLighting",
+	b"feSpotLight",
+	b"feTile",
+	b"feTurbulence",
+	b"filter",
+	b"font-face-format",
+	b"font-face-name",
+	b"font-face-src",
+	b"font-face-uri",
+	b"font-face",
+	b"font",
+	b"foreignObject",
+	b"g",
+	b"glyph",
+	b"glyphRef",
+	b"hatch",
+	b"hatchpath",
+	b"hkern",
+	b"image",
+	b"line",
+	b"linearGradient",
+	b"marker",
+	b"mask",
+	b"mesh",
+	b"meshgradient",
+	b"meshpatch",
+	b"meshrow",
+	b"metadata",
+	b"missing-glyph",
+	b"mpath",
+	b"path",
+	b"pattern",
+	b"polygon",
+	b"polyline",
+	b"radialGradient",
+	b"rect",
+	b"script",
+	b"set",
+	b"solidcolor",
+	b"stop",
+	b"style",
+	b"svg",
+	b"switch",
+	b"symbol",
+	b"text",
+	b"textPath",
+	b"title",
+	b"tref",
+	b"tspan",
+	b"unknown",
+	b"use",
+	b"view",
+	b"vkern",
+};
diff --git a/src/spec/tag/void.rs b/src/spec/tag/void.rs
new file mode 100644
index 0000000..ea87640
--- /dev/null
+++ b/src/spec/tag/void.rs
@@ -0,0 +1,19 @@
+use ::phf::{phf_set, Set};
+
+pub static VOID_TAGS: Set<&'static [u8]> = phf_set! {
+	b"area",
+	b"base",
+	b"br",
+	b"col",
+	b"embed",
+	b"hr",
+	b"img",
+	b"input",
+	b"keygen",
+	b"link",
+	b"meta",
+	b"param",
+	b"source",
+	b"track",
+	b"wbr",
+};
diff --git a/src/rule/tag/wss.rs b/src/spec/tag/wss.rs
similarity index 50%
rename from src/rule/tag/wss.rs
rename to src/spec/tag/wss.rs
index 1a15f14..a034818 100644
--- a/src/rule/tag/wss.rs
+++ b/src/spec/tag/wss.rs
@@ -1,7 +1,7 @@
 // "WSS" stands for whitespace-sensitive.
 use ::phf::{phf_set, Set};
 
-static WSS_TAGS: Set<&'static str> = phf_set! {
-	"code",
-	"pre",
+pub static WSS_TAGS: Set<&'static [u8]> = phf_set! {
+	b"code",
+	b"pre",
 };
diff --git a/src/unit.h b/src/unit.h
deleted file mode 100644
index d2b3cd2..0000000
--- a/src/unit.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include <hb/proc.h>
-
-#define HB_UNIT_ENTITY_NONE -1
-
-typedef enum {
-	// Special value for hb_unit_tag.
-	HB_UNIT_ATTR_NONE,
-
-	HB_UNIT_ATTR_QUOTED,
-	HB_UNIT_ATTR_UNQUOTED,
-	HB_UNIT_ATTR_NOVAL,
-} hb_unit_attr_type;
-
-hb_unit_attr_type hb_unit_attr(hb_proc* proc);
-hb_unit_attr_type
-hb_unit_attr_val_quoted(hb_proc* proc, bool should_collapse_and_trim_value_ws);
-void hb_unit_attr_val_unquoted(hb_proc* proc);
-
-void hb_unit_bang(hb_proc* proc);
-
-void hb_unit_comment(hb_proc* proc);
-
-void hb_unit_content_html(hb_proc* proc, nh_view_str* parent);
-void hb_unit_content_script(hb_proc* proc);
-void hb_unit_content_style(hb_proc* proc);
-
-int32_t hb_unit_entity(hb_proc* proc);
-
-void hb_unit_tag(hb_proc* proc, nh_view_str* parent);
-nh_view_str hb_unit_tag_name(hb_proc* proc);
diff --git a/src/unit/attr.c b/src/unit/attr.c
deleted file mode 100644
index 9f9eff1..0000000
--- a/src/unit/attr.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include <hb/collection.h>
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/unit.h>
-#include <stdbool.h>
-
-hb_unit_attr_type hb_unit_attr(hb_proc* proc)
-{
-	hb_proc_view_init_src(name, proc);
-
-	hb_proc_view_start_with_src_next(&name, proc);
-	do {
-		// Require at least one character.
-		hb_rune c = hb_proc_require_predicate(
-			proc, &hb_rule_attr_name_check, "attribute name");
-
-		if (hb_rule_ascii_uppercase_check(c)) {
-			hb_proc_error_if_not_suppressed(
-				proc, HB_ERR_PARSE_UCASE_ATTR,
-				"Uppercase letter in attribute name");
-		}
-	} while (hb_rule_attr_name_check(hb_proc_peek(proc)));
-	hb_proc_view_end_with_src_prev(&name, proc);
-
-	bool should_collapse_and_trim_value_ws =
-		nh_view_str_equals_literal_i(&name, "class")
-		&& proc->cfg->trim_class_attributes;
-	bool has_value = hb_proc_accept_if(proc, '=');
-	hb_unit_attr_type attr_type = HB_UNIT_ATTR_NOVAL;
-
-	if (has_value) {
-		hb_rune next = hb_proc_peek(proc);
-
-		if (hb_rule_attr_quote_check(next)) {
-			// Quoted attribute value.
-			attr_type = hb_unit_attr_val_quoted(
-				proc, should_collapse_and_trim_value_ws);
-		} else {
-			// Unquoted attribute value.
-			hb_proc_error_if_not_suppressed(
-				proc, HB_ERR_PARSE_UNQUOTED_ATTR,
-				"Unquoted attribute value");
-			attr_type = HB_UNIT_ATTR_UNQUOTED;
-			hb_unit_attr_val_unquoted(proc);
-		}
-	}
-
-	return attr_type;
-}
diff --git a/src/unit/attr/val.c b/src/unit/attr/val.c
deleted file mode 100644
index e69de29..0000000
diff --git a/src/unit/attr/val/quoted.c b/src/unit/attr/val/quoted.c
deleted file mode 100644
index ebfa5d9..0000000
--- a/src/unit/attr/val/quoted.c
+++ /dev/null
@@ -1,219 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/unit.h>
-
-#define _ENCODED_SINGLE_QUOTE "&#39;"
-#define _ENCODED_DOUBLE_QUOTE "&#34;"
-
-#define _COLLAPSE_WHITESPACE_IF_APPLICABLE()                                   \
-	if (last_char_was_whitespace) {                                        \
-		/* This is the first non-whitespace character after one or     \
-		 * more whitespace character(s), so collapse whitespace by     \
-		 * writing only one space. */                                  \
-		hb_proc_write(proc, ' ');                                      \
-		has_whitespace_after_processing = true;                        \
-		last_char_was_whitespace = false;                              \
-	}
-
-hb_unit_attr_type hb_unit_attr_val_quoted(hb_proc* proc,
-					  bool should_collapse_and_trim_ws)
-{
-	// Processing a quoted attribute value is tricky, due to the fact that
-	// it's not possible to know whether or not to unquote the value until
-	// the value has been processed. For example, decoding an entity could
-	// create whitespace in a value which might otherwise be unquotable. How
-	// this function works is:
-	//
-	// 1. Assume that the value is unquotable, and don't output any quotes.
-	// Decode any entities as necessary. Collect metrics on the types of
-	// characters in the value while processing.
-	// 2. Based on the metrics, if it's possible to not use quotes, nothing
-	// needs to be done and the function ends.
-	// 3. Choose a quote based on the amount of occurrences, to minimise the
-	// amount of encoded values.
-	// 4. Post-process the output by adding delimiter quotes and encoding
-	// quotes in values. This does mean that the output is written to twice.
-
-	bool should_decode_entities = proc->cfg->decode_entities;
-	bool should_remove_quotes = proc->cfg->remove_attr_quotes;
-
-	// Metrics for characters in the value.
-	// Used to decide what quotes to use, if any.
-	size_t count_double_quotation = 0;
-	size_t count_single_quotation = 0;
-	bool starts_with_quote = false;
-	bool has_whitespace_after_processing = false;
-
-	hb_rune quote = hb_proc_require_skip_predicate(
-		proc, &hb_rule_attr_quote_check, "attribute value quote");
-
-	if (should_collapse_and_trim_ws) {
-		hb_proc_skip_while_predicate(proc,
-					     &hb_rule_ascii_whitespace_check);
-	}
-
-	// Since it's not possible to optimise the delimiter quotes without
-	// knowing the complete value, mark the processed value in the output
-	// for post-processing later.
-	hb_proc_view_init_out(proc_value, proc);
-
-	hb_proc_view_start_with_out_next(&proc_value, proc);
-	bool last_char_was_whitespace = false;
-	bool is_first_char = true;
-	while (true) {
-		int32_t c = hb_proc_peek(proc);
-
-		if (c == quote) {
-			break;
-		}
-
-		bool processed_entity = c == '&';
-		if (processed_entity) {
-			// If not decoding entities, then this is first
-			// non-whitespace if last_char_was_whitespace, so space
-			// needs to be written before hb_unit_entity writes
-			// entity.
-			if (!should_decode_entities) {
-				_COLLAPSE_WHITESPACE_IF_APPLICABLE()
-			}
-
-			// Characters will be consumed by hb_unit_entity, but
-			// they will never be '\'', '"', or whitespace, as the
-			// function only consumes characters that could form a
-			// well formed entity. See the function for more
-			// details.
-			int32_t decoded = hb_unit_entity(proc);
-			// If not decoding entities, don't interpret using
-			// decoded character.
-			if (should_decode_entities)
-				c = decoded;
-		}
-		bool is_whitespace = hb_rule_ascii_whitespace_check(c);
-
-		if (should_collapse_and_trim_ws && is_whitespace) {
-			// Character, after any entity decoding, is whitespace.
-			// Don't write whitespace.
-			// In order to collapse whitespace, only write one space
-			// character once the first non-whitespace character
-			// after a sequence of whitespace characters is reached.
-			last_char_was_whitespace = true;
-			hb_proc_skip(proc);
-
-		} else {
-			// Character, after any entity decoding, is not
-			// whitespace.
-			_COLLAPSE_WHITESPACE_IF_APPLICABLE()
-
-			if (c == '"') {
-				if (is_first_char)
-					starts_with_quote = true;
-				count_double_quotation++;
-
-			} else if (c == '\'') {
-				if (is_first_char)
-					starts_with_quote = true;
-				count_single_quotation++;
-
-			} else if (is_whitespace) {
-				// `should_collapse_and_trim_ws` is false, so
-				// whitespace is written.
-				has_whitespace_after_processing = true;
-			}
-
-			if (!processed_entity) {
-				// Don't need to accept if hb_unit_entity has
-				// already been called.
-				hb_proc_accept(proc);
-			}
-		}
-
-		is_first_char = false;
-	}
-	hb_proc_view_end_with_out_prev(&proc_value, proc);
-	hb_proc_require_skip(proc, quote);
-
-	size_t proc_length = nh_view_str_length(&proc_value);
-
-	// Technically, the specification states that values may only be
-	// unquoted if they don't contain ["'`=<>]. However, browsers seem to
-	// interpret characters after `=` and before the nearest whitespace as
-	// an unquoted value, so long as no quote immediately follows `=`. If a
-	// value cannot be unquoted, use the one that appears the least and
-	// therefore requires the least amount of encoding. Prefer double quotes
-	// to single quotes if it's a tie.
-	hb_rune quote_to_encode;
-	char const* quote_encoded;
-	size_t quote_encoded_length;
-	size_t amount_of_quotes_to_encode;
-
-	if (should_remove_quotes && proc_length > 0
-	    && !has_whitespace_after_processing && !starts_with_quote) {
-		// No need to do any further processing; processed value is
-		// already in unquoted form.
-		return HB_UNIT_ATTR_UNQUOTED;
-
-	} else if (!should_decode_entities) {
-		// If entities are not being decoded, we are not allowed to
-		// encode and decode quotes to minimise the total count of
-		// encoded quotes. Therefore, there is no use to swapping
-		// delimiter quotes as at best it's not an improvement and at
-		// worst it could break the value.
-		quote_to_encode = quote;
-		quote_encoded = NULL;
-		quote_encoded_length = 0;
-		amount_of_quotes_to_encode = 0;
-
-	} else if (count_single_quotation < count_double_quotation) {
-		quote_to_encode = '\'';
-		quote_encoded = _ENCODED_SINGLE_QUOTE;
-		quote_encoded_length =
-			hb_string_literal_length(_ENCODED_SINGLE_QUOTE);
-		amount_of_quotes_to_encode = count_single_quotation;
-
-	} else {
-		quote_to_encode = '"';
-		quote_encoded = _ENCODED_DOUBLE_QUOTE;
-		quote_encoded_length =
-			hb_string_literal_length(_ENCODED_DOUBLE_QUOTE);
-		amount_of_quotes_to_encode = count_double_quotation;
-	}
-
-	size_t post_length =
-		2 + proc_length - amount_of_quotes_to_encode
-		+ (amount_of_quotes_to_encode * quote_encoded_length);
-	// Where the post-processed output should start in the output array.
-	size_t out_start = nh_view_str_start(&proc_value);
-	size_t proc_end = out_start + proc_length - 1;
-	size_t post_end = out_start + post_length - 1;
-
-	size_t reader = proc_end;
-	size_t writer = post_end;
-	proc->out[writer--] = quote_to_encode;
-	// To prevent overwriting data when encoding quotes, post-process output
-	// in reverse. Loop condition is checked at end of loop instead of
-	// before to prevent underflow. WARNING: This code directly uses and
-	// manipulates struct members of `proc`, which in general should be
-	// avoided.
-	while (true) {
-		hb_rune c = proc->out[reader];
-		if (should_decode_entities && c == quote_to_encode) {
-			writer -= quote_encoded_length;
-			// WARNING: This only works because hb_rune == char.
-			memcpy(&proc->out[writer + 1], quote_encoded,
-			       quote_encoded_length * sizeof(hb_rune));
-		} else {
-			proc->out[writer--] = c;
-		}
-
-		// Break before decrementing to prevent underflow.
-		if (reader == out_start) {
-			break;
-		}
-		reader--;
-	}
-	// This must be done after previous loop to prevent overwriting data.
-	proc->out[writer] = quote_to_encode;
-	proc->out_next = post_end + 1;
-
-	return HB_UNIT_ATTR_QUOTED;
-}
diff --git a/src/unit/attr/val/unquoted.c b/src/unit/attr/val/unquoted.c
deleted file mode 100644
index 23d4b1a..0000000
--- a/src/unit/attr/val/unquoted.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/unit.h>
-#include <stdbool.h>
-
-void hb_unit_attr_val_unquoted(hb_proc* proc)
-{
-	bool at_least_one_char = false;
-
-	hb_rune c;
-	while (true) {
-		c = hb_proc_peek(proc);
-		if (!hb_rule_attr_unquotedvalue_check(c)) {
-			break;
-		}
-		at_least_one_char = true;
-
-		if (c == '&') {
-			// Process entity.
-			hb_unit_entity(proc);
-		} else {
-			hb_proc_accept(proc);
-		}
-	}
-
-	if (!at_least_one_char) {
-		hb_proc_error_custom(
-			proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-			"Expected unquoted attribute value, got `%c` (U+%x)",
-			c);
-	}
-}
diff --git a/src/unit/bang.c b/src/unit/bang.c
deleted file mode 100644
index 9a2dd60..0000000
--- a/src/unit/bang.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <hb/unit.h>
-
-void hb_unit_bang(hb_proc* proc)
-{
-	hb_proc_require_match(proc, "<!");
-
-	while (hb_proc_accept_if_not(proc, '<'))
-		;
-
-	hb_proc_require(proc, '>');
-}
diff --git a/src/unit/comment.c b/src/unit/comment.c
deleted file mode 100644
index 4d2cdd3..0000000
--- a/src/unit/comment.c
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <hb/unit.h>
-
-void hb_unit_comment(hb_proc* proc)
-{
-	// Mark comment to write it later if not removing comments.
-	hb_proc_view_init_src(comment, proc);
-
-	hb_proc_view_start_with_src_next(&comment, proc);
-	hb_proc_require_skip_match(proc, "<!--");
-	while (!hb_proc_skip_if_matches(proc, "-->")) {
-		hb_proc_skip(proc);
-	}
-	hb_proc_view_end_with_src_prev(&comment, proc);
-
-	// Write comment if not removing comments.
-	if (proc->cfg->remove_comments) {
-		hb_proc_write_view(proc, &comment);
-	}
-}
diff --git a/src/unit/content/html.c b/src/unit/content/html.c
deleted file mode 100644
index 9e20bbe..0000000
--- a/src/unit/content/html.c
+++ /dev/null
@@ -1,192 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/rune.h>
-#include <hb/unit.h>
-
-// Ensure COMMENT, BANG, and OPENING_TAG are together, and update _state_is_cbot
-// if values are changed.
-typedef enum {
-	_STATE_COMMENT,
-	_STATE_BANG,
-	_STATE_OPENING_TAG,
-
-	_STATE_START,
-	_STATE_END,
-	_STATE_ENTITY,
-	_STATE_WHITESPACE,
-	_STATE_TEXT,
-} _state;
-
-static bool _state_is_cbot(_state state)
-{
-	return state >= _STATE_COMMENT && state <= _STATE_OPENING_TAG;
-}
-
-static _state _get_next_state(hb_proc* proc)
-{
-	hb_eof_rune c = hb_proc_peek_eof(proc);
-
-	if (c != HB_EOF && hb_rule_ascii_whitespace_check(c)) {
-		return _STATE_WHITESPACE;
-	}
-
-	if (c == HB_EOF || hb_proc_matches(proc, "</")) {
-		return _STATE_END;
-	}
-
-	if (hb_proc_matches(proc, "<!--")) {
-		return _STATE_COMMENT;
-	}
-
-	// Check after comment
-	if (hb_proc_matches(proc, "<!")) {
-		return _STATE_BANG;
-	}
-
-	// Check after comment and bang
-	if (c == '<') {
-		return _STATE_OPENING_TAG;
-	}
-
-	if (c == '&') {
-		return _STATE_ENTITY;
-	}
-
-	return _STATE_TEXT;
-}
-
-/*
- * Whitespace handling is the trickiest part of this function.
- * There are three potential minification settings that affect whitespace
- * handling:
- *   - collapse
- *   - destroy whole
- *   - trim
- * What whitespace to minify depends on the parent and configured settings.
- * We want to prevent memory allocation and use only one pass, but whitespace
- * handling often involves looking ahead.
- */
-void hb_unit_content_html(hb_proc* proc, nh_view_str* parent)
-{
-	bool should_collapse_whitespace =
-		hb_cfg_should_min(&proc->cfg->collapse_whitespace, parent);
-	bool should_destroy_whole_whitespace =
-		hb_cfg_should_min(&proc->cfg->destroy_whole_whitespace, parent);
-	bool should_trim_whitespace =
-		hb_cfg_should_min(&proc->cfg->trim_whitespace, parent);
-
-	// Trim leading whitespace if configured to do so.
-	if (should_trim_whitespace) {
-		hb_proc_skip_while_predicate(proc,
-					     &hb_rule_ascii_whitespace_check);
-	}
-
-	_state last_state = _STATE_START;
-	hb_proc_view_init_src(whitespace, proc);
-	// Whether or not currently in whitespace.
-	bool whitespace_buffered = false;
-	// If currently in whitespace, whether or not current contiguous
-	// whitespace started after a bang, comment, or tag.
-	bool whitespace_started_after_cbot = false;
-
-	while (true) {
-		_state next_state = _get_next_state(proc);
-
-		if (next_state == _STATE_WHITESPACE) {
-			// Whitespace is always buffered and then processed
-			// afterwards, even if not minifying.
-			hb_proc_skip(proc);
-
-			if (last_state != _STATE_WHITESPACE) {
-				// This is the start of one or more whitespace
-				// characters, so start a view of this
-				// contiguous whitespace and don't write any
-				// characters that are part of it yet.
-				hb_proc_view_start_with_src_next(&whitespace,
-								 proc);
-				whitespace_buffered = true;
-				whitespace_started_after_cbot =
-					_state_is_cbot(last_state);
-			} else {
-				// This is part of a contiguous whitespace, but
-				// not the start of, so simply ignore.
-			}
-
-		} else {
-			// Next character is not whitespace, so handle any
-			// previously buffered whitespace.
-			if (whitespace_buffered) {
-				// Mark the end of the whitespace.
-				hb_proc_view_end_with_src_prev(&whitespace,
-							       proc);
-
-				if (should_destroy_whole_whitespace
-				    && whitespace_started_after_cbot
-				    && _state_is_cbot(next_state)) {
-					// Whitespace is between two tags,
-					// comments, or bangs.
-					// destroy_whole_whitespace is on, so
-					// don't write it.
-
-				} else if (should_trim_whitespace
-					   && next_state == _STATE_END) {
-					// Whitespace is trailing.
-					// should_trim_whitespace is on, so
-					// don't write it.
-
-				} else if (should_collapse_whitespace) {
-					// Current contiguous whitespace needs
-					// to be reduced to a single space
-					// character.
-					hb_proc_write(proc, ' ');
-
-				} else {
-					// Whitespace cannot be minified, so
-					// write in entirety.
-					hb_proc_write_view(proc, &whitespace);
-				}
-
-				// Reset whitespace buffer.
-				whitespace_buffered = false;
-			}
-
-			// Process and consume next character(s).
-			switch (next_state) {
-			case _STATE_COMMENT:
-				hb_unit_comment(proc);
-				break;
-
-			case _STATE_BANG:
-				hb_unit_bang(proc);
-				break;
-
-			case _STATE_OPENING_TAG:
-				hb_unit_tag(proc, parent);
-				break;
-
-			case _STATE_END:
-				break;
-
-			case _STATE_ENTITY:
-				hb_unit_entity(proc);
-				break;
-
-			case _STATE_TEXT:
-				hb_proc_accept(proc);
-				break;
-
-			default:
-				// Defensive coding.
-				hb_proc_error(
-					proc,
-					HB_ERR_INTERR_UNKNOWN_CONTENT_NEXT_STATE,
-					"Unknown content type");
-			}
-		}
-
-		last_state = next_state;
-		if (next_state == _STATE_END) {
-			break;
-		}
-	}
-}
diff --git a/src/unit/content/script.c b/src/unit/content/script.c
deleted file mode 100644
index 8d8238c..0000000
--- a/src/unit/content/script.c
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <hb/proc.h>
-
-static void _parse_comment_single(hb_proc* proc)
-{
-	hb_proc_require_match(proc, "//");
-
-	// Comment can end at closing </script>.
-	// WARNING: Closing tag must not contain whitespace.
-	while (!hb_proc_accept_if_matches_line_terminator(proc)) {
-		if (hb_proc_matches_i(proc, "</script>")) {
-			break;
-		}
-
-		hb_proc_accept(proc);
-	}
-}
-
-static void _parse_comment_multi(hb_proc* proc)
-{
-	hb_proc_require_match(proc, "/*");
-
-	// Comment can end at closing </script>.
-	// WARNING: Closing tag must not contain whitespace.
-	while (!hb_proc_accept_if_matches(proc, "*/")) {
-		if (hb_proc_matches_i(proc, "</script>")) {
-			break;
-		}
-
-		hb_proc_accept(proc);
-	}
-}
-
-static void _parse_string(hb_proc* proc)
-{
-	hb_rune delim = hb_proc_accept(proc);
-
-	if (delim != '"' && delim != '\'') {
-		hb_proc_error(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-			      "Expected JavaScript string delimiter");
-	}
-
-	bool escaping = false;
-
-	while (true) {
-		hb_rune c = hb_proc_accept(proc);
-
-		if (c == '\\') {
-			escaping = !escaping;
-			continue;
-		}
-
-		if (c == delim && !escaping) {
-			break;
-		}
-
-		if (hb_proc_accept_if_matches_line_terminator(proc)) {
-			if (!escaping) {
-				hb_proc_error(proc,
-					      HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-					      "Unterminated JavaScript string");
-			}
-		}
-
-		escaping = false;
-	}
-}
-
-static void _parse_template(hb_proc* proc)
-{
-	hb_proc_require_match(proc, "`");
-
-	bool escaping = false;
-
-	while (true) {
-		hb_rune c = hb_proc_accept(proc);
-
-		if (c == '\\') {
-			escaping = !escaping;
-			continue;
-		}
-
-		if (c == '`' && !escaping) {
-			break;
-		}
-
-		escaping = false;
-	}
-}
-
-void hb_unit_content_script(hb_proc* proc)
-{
-	while (!hb_proc_matches(proc, "</")) {
-		if (hb_proc_matches(proc, "//")) {
-			_parse_comment_single(proc);
-		} else if (hb_proc_matches(proc, "/*")) {
-			_parse_comment_multi(proc);
-		} else {
-			switch (hb_proc_peek(proc)) {
-			case '\'':
-			case '"':
-				_parse_string(proc);
-				break;
-
-			case '`':
-				_parse_template(proc);
-				break;
-
-			default:
-				hb_proc_accept(proc);
-			}
-		}
-	}
-}
diff --git a/src/unit/content/style.c b/src/unit/content/style.c
deleted file mode 100644
index e2e1519..0000000
--- a/src/unit/content/style.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <hb/proc.h>
-
-static void _parse_comment(hb_proc* proc)
-{
-	hb_proc_require_match(proc, "/*");
-
-	// Unlike script tags, style comments do NOT end at closing tag.
-	while (!hb_proc_accept_if_matches(proc, "*/")) {
-		hb_proc_accept(proc);
-	}
-}
-
-static void _parse_string(hb_proc* proc)
-{
-	hb_rune delim = hb_proc_accept(proc);
-
-	if (delim != '"' && delim != '\'') {
-		hb_proc_error(proc, HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-			      "Expected CSS string delimiter");
-	}
-
-	bool escaping = false;
-
-	while (true) {
-		hb_rune c = hb_proc_accept(proc);
-
-		if (c == '\\') {
-			escaping = !escaping;
-			continue;
-		}
-
-		if (c == delim && !escaping) {
-			break;
-		}
-
-		if (hb_proc_accept_if_matches_line_terminator(proc)) {
-			if (!escaping) {
-				hb_proc_error(proc,
-					      HB_ERR_PARSE_EXPECTED_NOT_FOUND,
-					      "Unterminated CSS string");
-			}
-		}
-
-		escaping = false;
-	}
-}
-
-void hb_unit_content_style(hb_proc* proc)
-{
-	while (!hb_proc_matches(proc, "</")) {
-		if (hb_proc_matches(proc, "/*")) {
-			_parse_comment(proc);
-		} else {
-			switch (hb_proc_peek(proc)) {
-			case '\'':
-			case '"':
-				_parse_string(proc);
-				break;
-			default:
-				hb_proc_accept(proc);
-			}
-		}
-	}
-}
diff --git a/src/unit/entity.c b/src/unit/entity.c
deleted file mode 100644
index 48121f3..0000000
--- a/src/unit/entity.c
+++ /dev/null
@@ -1,221 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/unit.h>
-
-// The minimum length of any entity is 3, which is a character entity reference
-// with a single character name. The longest UTF-8 representation of a Unicode
-// code point is 4 bytes. Because there are no character entity references with
-// a name of length 1, it's always better to decode entities for minification
-// purposes.
-
-// Based on the data sourced from https://www.w3.org/TR/html5/entities.json as
-// of 2019-04-20T04:00:00.000Z:
-// - Entity names can have [A-Za-z0-9] characters, and are case sensitive.
-// - Some character entity references do not need to end with a semicolon.
-// - The longest name is "CounterClockwiseContourIntegral", with length 31
-// (excluding leading ampersand and trailing semicolon).
-// - All entity names are at least 2 characters long.
-
-// Browser implementation behaviour to consider:
-// - It is unclear what happens if an entity name does not match case
-// sensitively but matches two or more case insensitively.
-//   - For example, given "AlphA" or "aLpha", does the browser choose "alpha" or
-//   "Alpha"?
-// - Do browsers render valid entities without trailing semicolons?
-//   - For example, how do browsers interpret "Chuck-&amp-Cheese", "1&amp1", and
-//   "&ampe;"?
-
-// hyperbuild implementation:
-// - Entities must start with an ampersand and end with a semicolon.
-// - Once an ampersand is encountered, it and the sequence of characters
-// following must match the following ECMAScript regular expression to be
-// considered a well formed entity:
-//
-//   /&(#(x[0-9a-f]{1-6}|[0-9]{1,7}))|[a-z0-9]{2,31};/i
-//
-// - If the sequence of characters following an ampersand do not combine to form
-// a well formed entity, the ampersand is considered a bare ampersand.
-//   - A bare ampersand is an ampersand that is interpreted literally and not as
-//   the start of an entity.
-//   - hyperbuild looks ahead without consuming to check if the following
-//   characters would form a well formed entity. If they don't, only the longest
-//   subsequence that could form a well formed entity is consumed.
-// - An entity is considered invalid if it is well formed but represents a
-// non-existent Unicode code point or reference name.
-
-#define _MAX_UNICODE_CODE_POINT 0x10FFFF
-
-typedef enum {
-	_TYPE_MALFORMED,
-	_TYPE_NAME,
-	_TYPE_DECIMAL,
-	_TYPE_HEXADECIMAL
-} _type;
-
-typedef bool _valid_char_predicate(hb_rune c);
-
-static int32_t _parse_decimal(nh_view_str* view)
-{
-	int32_t val = 0;
-	nh_view_for(view, i, _, len)
-	{
-		char c = nh_view_str_get(view, i);
-		val = val * 10 + (c - '0');
-	}
-	return val > _MAX_UNICODE_CODE_POINT ? -1 : val;
-}
-
-static int32_t _parse_hexadecimal(nh_view_str* view)
-{
-	int32_t val = 0;
-	nh_view_for(view, i, _, len)
-	{
-		char c = nh_view_str_get(view, i);
-		int32_t digit = hb_rule_ascii_digit_check(c)
-					? c - '0'
-					: hb_rule_ascii_uppercase_check(c)
-						  ? c - 'A' + 10
-						  : c - 'a' + 10;
-		val = val * 16 + digit;
-	}
-	return val > _MAX_UNICODE_CODE_POINT ? -1 : val;
-}
-
-/**
- * Process an HTML entity.
- *
- * @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
- * entity is malformed or invalid
- */
-int32_t hb_unit_entity(hb_proc* proc)
-{
-	// View of the entire entity, including leading ampersand and any
-	// trailing semicolon.
-	hb_proc_view_init_src(entity, proc);
-	hb_proc_view_start_with_src_next(&entity, proc);
-	hb_proc_require_skip(proc, '&');
-
-	// The input can end at any time after initial ampersand.
-	// Examples of valid complete source code: "&", "&a", "&#", "&#09",
-	// "&amp".
-
-	// There are three stages to this function:
-	//
-	// 1. Determine the type of entity, so we can know how to parse and
-	// validate the following characters.
-	//    - This can be done by simply looking at the first and second
-	//    characters after the initial ampersand, e.g. "&#", "&#x", "&a".
-	// 2. Parse the entity data, i.e. the characters between the ampersand
-	// and semicolon.
-	//    - To avoid parsing forever on malformed entities without
-	//    semicolons, there is an upper bound on the amount of possible
-	//    characters, based on the type of entity detected from the first
-	//    stage.
-	// 3. Interpret and validate the data.
-	//    - This simply checks if it refers to a valid Unicode code point or
-	//    entity reference name.
-
-	// First stage: determine the type of entity.
-	_valid_char_predicate* predicate;
-	_type type;
-	size_t min_len;
-	size_t max_len;
-
-	if (hb_proc_skip_if_matches(proc, "#x")) {
-		predicate = &hb_rule_ascii_hex_check;
-		type = _TYPE_HEXADECIMAL;
-		min_len = 1;
-		max_len = 6;
-
-	} else if (hb_proc_skip_if(proc, '#')) {
-		predicate = &hb_rule_ascii_digit_check;
-		type = _TYPE_DECIMAL;
-		min_len = 1;
-		max_len = 7;
-
-	} else if (hb_rule_entity_reference_valid_name_char(
-			   hb_proc_peek_eof(proc))) {
-		predicate = &hb_rule_entity_reference_valid_name_char;
-		type = _TYPE_NAME;
-		min_len = 2;
-		max_len = 31;
-
-	} else {
-		hb_proc_error_if_not_suppressed(proc,
-						HB_ERR_PARSE_MALFORMED_ENTITY,
-						"Malformed entity");
-		// Output bare ampersand.
-		hb_proc_write(proc, '&');
-		return HB_UNIT_ENTITY_NONE;
-	}
-
-	// Second stage: try to parse a well formed entity.
-	// If the entity is not well formed, either throw an error or interpret
-	// literally (depending on configuration).
-	hb_proc_view_init_src(data, proc);
-	hb_proc_view_start_with_src_next(&data, proc);
-	for (size_t i = 0; i < max_len; i++) {
-		hb_eof_rune c = hb_proc_peek_eof(proc);
-		// Character ends entity.
-		if (c == ';') {
-			break;
-		}
-		// Character would not form well formed entity.
-		if (!(*predicate)(c)) {
-			type = _TYPE_MALFORMED;
-			break;
-		}
-		// Character is valid.
-		hb_proc_skip(proc);
-	}
-	hb_proc_view_end_with_src_prev(&data, proc);
-	if (nh_view_str_length(&data) < min_len)
-		type = _TYPE_MALFORMED;
-	// Don't try to consume semicolon if entity is not well formed already.
-	if (type != _TYPE_MALFORMED && !hb_proc_skip_if(proc, ';'))
-		type = _TYPE_MALFORMED;
-	hb_proc_view_end_with_src_prev(&entity, proc);
-
-	if (type == _TYPE_MALFORMED) {
-		hb_proc_error_if_not_suppressed(proc,
-						HB_ERR_PARSE_MALFORMED_ENTITY,
-						"Malformed entity");
-		// Write longest subsequence of characters that could form a
-		// well formed entity.
-		hb_proc_write_view(proc, &entity);
-		return HB_UNIT_ENTITY_NONE;
-	}
-
-	// Third stage: validate entity and decode if configured to do so.
-	int32_t uchar = -1;
-	switch (type) {
-	case _TYPE_NAME:
-		uchar = hb_rule_entity_reference_get_code_point(&data);
-		break;
-
-	case _TYPE_DECIMAL:
-		uchar = _parse_decimal(&data);
-		break;
-
-	case _TYPE_HEXADECIMAL:
-		uchar = _parse_hexadecimal(&data);
-		break;
-
-	default:
-		// Defensive coding.
-		hb_proc_error(proc, HB_ERR_INTERR_UNKNOWN_ENTITY_TYPE,
-			      "Unknown entity type");
-	}
-	if (uchar == -1) {
-		hb_proc_error(proc, HB_ERR_PARSE_INVALID_ENTITY,
-			      "Invalid entity");
-	}
-
-	if (proc->cfg->decode_entities) {
-		hb_proc_write_utf_8(proc, uchar);
-	} else {
-		hb_proc_write_view(proc, &entity);
-	}
-
-	return uchar;
-}
diff --git a/src/unit/tag.c b/src/unit/tag.c
deleted file mode 100644
index 4bb428f..0000000
--- a/src/unit/tag.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#include <hb/proc.h>
-#include <hb/rule.h>
-#include <hb/unit.h>
-
-void hb_unit_tag(hb_proc* proc, nh_view_str* parent)
-{
-	hb_proc_require(proc, '<');
-	nh_view_str name = hb_unit_tag_name(proc);
-
-	// Check that this tag is allowed directly under its parent.
-	if (!hb_rule_tag_parent_whitelist_allowed(&name, parent)
-	    || !hb_rule_tag_child_whitelist_allowed(parent, &name)
-	    || !hb_rule_tag_parent_blacklist_allowed(&name, parent)
-	    || !hb_rule_tag_child_blacklist_allowed(parent, &name)) {
-		hb_proc_error(proc, HB_ERR_PARSE_ILLEGAL_CHILD,
-			      "Tag can't be a child here");
-	}
-
-	hb_unit_attr_type last_attr_type = HB_UNIT_ATTR_NONE;
-	bool self_closing = false;
-
-	while (true) {
-		// At the beginning of this loop, the last parsed unit was
-		// either the tag name or an attribute (including its value, if
-		// it had one).
-		size_t ws_accepted;
-		if (proc->cfg->remove_tag_whitespace) {
-			ws_accepted = hb_proc_skip_while_predicate(
-				proc, &hb_rule_ascii_whitespace_check);
-		} else {
-			ws_accepted = hb_proc_accept_while_predicate(
-				proc, &hb_rule_ascii_whitespace_check);
-		}
-
-		if (hb_proc_accept_if(proc, '>')) {
-			// End of tag.
-			break;
-		}
-
-		if ((self_closing = hb_proc_accept_if_matches(proc, "/>"))) {
-			hb_proc_error_if_not_suppressed(
-				proc, HB_ERR_PARSE_SELF_CLOSING_TAG,
-				"Self-closing tag");
-			break;
-		}
-
-		// HB_ERR_PARSE_NO_SPACE_BEFORE_ATTR is not suppressible as
-		// otherwise there would be difficulty in determining what is
-		// the end of a tag/attribute name/attribute value.
-		if (!ws_accepted) {
-			hb_proc_error(proc, HB_ERR_PARSE_NO_SPACE_BEFORE_ATTR,
-				      "No whitespace before attribute");
-		}
-
-		if (proc->cfg->remove_tag_whitespace) {
-			if (last_attr_type != HB_UNIT_ATTR_QUOTED) {
-				hb_proc_write(proc, ' ');
-			}
-		}
-
-		last_attr_type = hb_unit_attr(proc);
-	}
-
-	if (self_closing || hb_rule_tag_void_check(&name)) {
-		return;
-	}
-
-	if (nh_view_str_equals_literal_i(&name, "script")) {
-		// <script> tag.
-		hb_unit_content_script(proc);
-	} else if (nh_view_str_equals_literal_i(&name, "style")) {
-		// <style> tag.
-		hb_unit_content_style(proc);
-	} else {
-		// Standard HTML.
-		hb_unit_content_html(proc, &name);
-	}
-
-	// Require closing tag for non-void.
-	hb_proc_require_match(proc, "</");
-	nh_view_str closing_name = hb_unit_tag_name(proc);
-	if (!nh_view_str_equals(&name, &closing_name)) {
-		// TODO Find a way to cleanly provide opening and closing tag
-		// names (which are views) into error message without leaking
-		// memory.
-		hb_proc_error(proc, HB_ERR_PARSE_UNCLOSED_TAG,
-			      "Tag not closed");
-	}
-	hb_proc_require(proc, '>');
-}
diff --git a/src/unit/tag/name.c b/src/unit/tag/name.c
deleted file mode 100644
index d4883c1..0000000
--- a/src/unit/tag/name.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <hb/collection.h>
-#include <hb/proc.h>
-#include <hb/rule.h>
-
-nh_view_str hb_unit_tag_name(hb_proc* proc)
-{
-	hb_proc_view_init_src(name, proc);
-
-	hb_proc_view_start_with_src_next(&name, proc);
-	do {
-		// Require at least one character.
-		hb_rune c = hb_proc_require_predicate(
-			proc, &hb_rule_tag_name_check, "tag name");
-
-		if (hb_rule_ascii_uppercase_check(c)) {
-			hb_proc_error_if_not_suppressed(
-				proc, HB_ERR_PARSE_UCASE_TAG,
-				"Uppercase letter in tag name");
-		}
-	} while (hb_rule_tag_name_check(hb_proc_peek(proc)));
-	hb_proc_view_end_with_src_prev(&name, proc);
-
-	if (!hb_rule_tag_valid_check(&name)) {
-		hb_proc_error_if_not_suppressed(
-			proc, HB_ERR_PARSE_NONSTANDARD_TAG, "Non-standard tag");
-	}
-
-	return name;
-}
diff --git a/test/hbtest.h b/test/hbtest.h
deleted file mode 100644
index 5fbb63c..0000000
--- a/test/hbtest.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once
-
-#include <stdio.h>
-
-#define expect(cond, msg)                                                      \
-	if (!cond)                                                             \
-	fprintf(stderr, "Test failed: " msg " [%s %s() line %d]", __FILE__,    \
-		__func__, __LINE__)