Complete initial migration to Rust

2019-12-25 20:44:51 +11:00 · 2019-12-25 20:44:51 +11:00 · 806560dd94
parent d75d62883b
commit 806560dd94
26 changed files with 911 additions and 1027 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -6,3 +6,5 @@ edition = "2018"

 [dependencies]
 phf = { version = "0.8.0", features = ["macros"] }
+cascade = "0.1.4"
+structopt = "0.3.5"
--- a/archive/quoted.rs
+++ b/archive/quoted.rs
@ -1,130 +0,0 @@
-fn tmp() -> () {
-    // TODO
-    loop {
-        let is_whitespace = is_whitespace(c);
-        if should_collapse_and_trim_ws && is_whitespace {
-            // Character, after any entity decoding, is whitespace.
-            // Don't write whitespace.
-            // In order to collapse whitespace, only write one space
-            // character once the first non-whitespace character
-            // after a sequence of whitespace characters is reached.
-            last_char_was_whitespace = true;
-            proc.skip();
-        } else {
-            // Character, after any entity decoding, is not whitespace.
-            if last_char_was_whitespace {
-                // This is the first non-whitespace character after one or more whitespace
-                // character(s), so collapse whitespace by writing only one space.
-                proc.write(b' ');
-                has_whitespace_after_processing = true;
-                last_char_was_whitespace = false;
-            };
-
-            if c == b'"' {
-                count_double_quotation += 1;
-            } else if c == b'\'' {
-                count_single_quotation += 1;
-            } else if is_whitespace {
-                // `should_collapse_and_trim_ws` is false, so
-                // whitespace is written.
-                has_whitespace_after_processing = true;
-            };
-
-            increment_count(c);
-            if !processed_entity {
-                // Don't need to accept if hb_unit_entity has
-                // already been called.
-                proc.accept();
-            };
-        };
-    }
-
-    // Since it's not possible to optimise the delimiter quotes without
-    // knowing the complete value, mark the processed value in the output
-    // for post-processing later.
-    let proc_value_start = proc.data.get_out_pos();
-    let mut is_first_char = true;
-
-    loop {
-        let processed_entity = c == b'&';
-        if processed_entity {
-            // Characters will be consumed by hb_unit_entity, but they will never be '\'', '"', or
-            // whitespace, as the function only consumes characters that could form a  well formed
-            // entity. See the function for more details.
-            // TODO Handle bad char
-            let decoded = process_entity(proc)?;
-            match decoded {
-                Some(e) => if e <= 0x7f { c = e as u8; } else { c = 0xff; },
-                None => c = 0xff,
-            };
-        }
-
-
-        is_first_char = false;
-    };
-    let proc_length = proc.data.get_out_pos() + 1 - proc_value_start;
-    proc.match_char(delimiter).require()?.discard();
-
-    // Technically, the specification states that values may only be
-    // unquoted if they don't contain ["'`=<>]. However, browsers seem to
-    // interpret characters after `=` and before the nearest whitespace as
-    // an unquoted value, so long as no quote immediately follows `=`. If a
-    // value cannot be unquoted, use the one that appears the least and
-    // therefore requires the least amount of encoding. Prefer double quotes
-    // to single quotes if it's a tie.
-    let quote_to_encode;
-    let quote_encoded;
-    let amount_of_quotes_to_encode;
-
-    if proc_length > 0 && !has_whitespace_after_processing && !starts_with_quote {
-        // No need to do any further processing; processed value is
-        // already in unquoted form.
-        return Ok(AttrType::Unquoted);
-    } else if count_single_quotation < count_double_quotation {
-        quote_to_encode = b'\'';
-        quote_encoded = ENCODED_SINGLE_QUOTE;
-        amount_of_quotes_to_encode = count_single_quotation;
-    } else {
-        quote_to_encode = b'"';
-        quote_encoded = ENCODED_DOUBLE_QUOTE;
-        amount_of_quotes_to_encode = count_double_quotation;
-    }
-
-    // TODO Improve; avoid direct memory access; clean API.
-    let post_length = 2 + proc_length - amount_of_quotes_to_encode + (amount_of_quotes_to_encode * quote_encoded.len());
-    // Where the post-processed output should start in the output array.
-    let out_start = proc_value_start;
-    let proc_end = out_start + proc_length - 1;
-    let post_end = out_start + post_length - 1;
-
-    let mut reader = proc_end;
-    let mut writer = post_end;
-    proc.data.set_out_char_at(writer, quote_to_encode);
-    writer -= 1;
-    // To prevent overwriting data when encoding quotes, post-process output
-    // in reverse. Loop condition is checked at end of loop instead of
-    // before to prevent underflow. WARNING: This code directly uses and
-    // manipulates struct members of `proc`, which in general should be
-    // avoided.
-    loop {
-        let c = proc.data.get_src_char_at(reader);
-        if c == quote_to_encode {
-            writer -= quote_encoded.len();
-            proc.data.replace_out_slice(writer + 1, quote_encoded);
-        } else {
-            proc.data.set_out_char_at(writer, c);
-            writer -= 1;
-        }
-
-        // Break before decrementing to prevent underflow.
-        if reader == out_start {
-            break;
-        }
-        reader -= 1;
-    }
-    // This must be done after previous loop to prevent overwriting data.
-    proc.data.set_out_char_at(writer, quote_to_encode);
-    proc.data.set_out_pos(post_end + 1);
-
-    Ok(AttrType::Quoted)
-}
--- a/src/code.rs
+++ b/src/code.rs
@ -0,0 +1,30 @@
+use std::ops::Range;
+
+// TODO Inline with proc.
+pub struct Code<'d> {
+    pub data: &'d mut [u8],
+}
+
+impl<'d> Code<'d> {
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    pub fn read_char(&self, pos: usize) -> u8 {
+        self.data[pos]
+    }
+    pub fn read_slice(&self, range: Range<usize>) -> &[u8] {
+        &self.data[range]
+    }
+
+    pub fn copy_within(&mut self, src: Range<usize>, to: usize) {
+        self.data.copy_within(src, to);
+    }
+
+    pub fn write_char(&mut self, pos: usize, c: u8) -> () {
+        self.data[pos] = c;
+    }
+    pub fn write_slice(&mut self, pos: usize, s: &[u8]) -> () {
+        self.data[pos..pos + s.len()].copy_from_slice(s);
+    }
+}
--- a/src/code/inplace.rs
+++ b/src/code/inplace.rs
@ -1,10 +0,0 @@
-pub struct CodeInPlace<'data> {
-    data: &'data mut [u8],
-    read_next: usize,
-    // Offset of the next unwritten space.
-    write_next: usize,
-}
-
-impl Code for CodeInPlace {
-
-}
--- a/src/code/mod.rs
+++ b/src/code/mod.rs
@ -1,57 +0,0 @@
-use std::ops::Range;
-
-pub trait Code {
-    // Unsafe direct memory access.
-    // TODO Pos refers to index of next readable.
-    unsafe fn get_src_pos(&self) -> usize;
-    /// Does NOT check bounds (assumes already checked).
-    unsafe fn set_src_pos(&self, pos: usize) -> ();
-    unsafe fn get_src_char_at(&self, pos: usize) -> u8;
-    /// Get a slice from `start` (inclusive) to `end` (exclusive).
-    unsafe fn get_src_slice(&self, range: Range<usize>) -> &[u8];
-
-    // TODO Pos refers to index of next writable.
-    unsafe fn get_out_pos(&self) -> usize;
-    /// Does NOT check bounds (assumes already checked).
-    unsafe fn set_out_pos(&self, pos: usize) -> usize;
-    unsafe fn set_out_char_at(&self, pos: usize, c: u8) -> ();
-    unsafe fn get_out_mut_slice(&self, range: Range<usize>) -> &mut [u8];
-    unsafe fn replace_out_at(&self, pos: usize, s: &[u8]) -> ();
-
-    // Checking bounds.
-    fn in_bounds(&self, offset: usize) -> bool;
-    fn at_end(&self) -> bool {
-        !self.in_bounds(0)
-    }
-
-    // Reading.
-    /// Get the `offset` character from next.
-    /// When `offset` is 0, the next character is returned.
-    /// Panics. Does not check bounds for performance (e.g. already checked).
-    fn read(&self, offset: usize) -> u8 {
-        self.get_src_char_at(self.get_src_pos() + offset)
-    }
-    fn maybe_read(&self, offset: usize) -> Option<u8> {
-        if self.in_bounds(offset) {
-            Some(self.read(offset))
-        } else {
-            None
-        }
-    }
-    /// Get a slice of the next `count` characters from next.
-    /// Panics. Does not check bounds for performance (e.g. already checked).
-    fn read_slice(&self, count: usize) -> &[u8] {
-        self.get_src_slice(self.get_src_pos()..self.get_src_pos() + count)
-    }
-
-    // Writing.
-    /// Move next `amount` characters to output.
-    /// Panics. Does not check bounds for performance (e.g. already checked).
-    fn shift(&self, amount: usize) -> ();
-    fn write(&self, c: u8) -> ();
-    fn write_slice(&self, s: &[u8]) -> ();
-
-    // Skipping.
-    /// Panics. Does not check bounds for performance (e.g. already checked).
-    fn consume(&self, amount: usize) -> ();
-}
--- a/src/code/outofplace.rs
+++ b/src/code/outofplace.rs
@ -1,11 +0,0 @@
-pub struct CodeOutOfPlace<'src, 'out> {
-    src: &'src [u8],
-    src_next: usize,
-
-    out: &'out mut [u8],
-    out_next: usize,
-}
-
-impl Code for CodeOutOfPlace {
-
-}
--- a/src/err.rs
+++ b/src/err.rs
@ -1,3 +1,4 @@
+#[derive(Debug)]
 pub enum HbErr {
    ExpectedCharNotFound { expected: u8, got: u8 },
    ExpectedMatchNotFound(&'static [u8]),
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,12 +1,13 @@
+use crate::err::HbRes;
+use crate::proc::Processor;
+use crate::unit::content::process_content;
+
 mod code;
-mod err;
+pub mod err;
+#[macro_use]
 mod proc;
 mod spec;
-
-use err::HbRes;
-use crate::code::Code;
-use crate::proc::content::process_content;
-use crate::proc::Processor;
+mod unit;

 /**
 * Run hyperbuild on an input array and write to {@param output}. Output will be
@ -20,6 +21,8 @@ use crate::proc::Processor;
 * @param cfg configuration to use
 * @return result where to write any resulting error information
 */
-fn hyperbuild<T: Code>(code: &mut T) -> HbRes<()> {
-    process_content(&Processor { data: code }, None)
+pub fn hyperbuild<'d>(code: &'d mut [u8]) -> HbRes<usize> {
+    let mut p = Processor::new(code);
+    process_content(&mut p, None)?;
+    Ok(p.written_len())
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,27 @@
+use std::fs::File;
+use std::io::{Read, stdin, stdout, Write};
+use structopt::StructOpt;
+
+use hyperbuild::hyperbuild;
+
+#[derive(StructOpt)]
+struct Cli {
+    #[structopt(short, long, parse(from_os_str))]
+    src: std::path::PathBuf,
+    #[structopt(short, long, parse(from_os_str))]
+    out: std::path::PathBuf,
+}
+
+fn main() {
+    let args = Cli::from_args();
+    let mut vec = Vec::<u8>::new();
+    let mut src_file = File::open(args.src).expect("could not read source file");
+    src_file.read_to_end(&mut vec);
+    let mut code = vec.as_mut_slice();
+    // TODO
+    let result = hyperbuild(code).unwrap();
+    println!("{}", result);
+    let mut out_file = File::create(args.out).expect("could not open output file");
+    out_file.write_all(&code[..result]).expect("could not write to output file");
+    println!("Done!")
+}
--- a/src/proc.rs
+++ b/src/proc.rs
@ -0,0 +1,446 @@
+use std::ops::Index;
+
+use phf::Set;
+
+use crate::code::Code;
+use crate::err::{HbErr, HbRes};
+
+macro_rules! cascade_return {
+    ($proc:ident $($tail:tt)+) => ({
+        cascade_return!(@line $proc, last, $($tail)+);
+        last
+    });
+    // Match `?` operator before a call without `?`.
+    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*)? $($tail:tt)+) => {
+        $proc.$method($($arg),*)?;
+        cascade_return!(@line $proc, $last, $($tail)*);
+    };
+    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*) $($tail:tt)+) => {
+        $proc.$method($($arg),*);
+        cascade_return!(@line $proc, $last, $($tail)*);
+    };
+    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*)?) => {
+        let $last = $proc.$method($($arg),*)?;
+    };
+    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*)) => {
+        let $last = $proc.$method($($arg),*);
+    };
+}
+
+#[derive(Copy, Clone)]
+pub enum RequireReason {
+    Custom,
+    ExpectedNotChar(u8),
+    ExpectedMatch(&'static [u8]),
+    ExpectedChar(u8),
+}
+
+#[derive(Copy, Clone)]
+struct Match {
+    // Need to record start as we might get slice after keeping or skipping.
+    start: usize,
+    // Guaranteed amount of characters that exist from `start` at time of creation of this struct.
+    count: usize,
+    // Character matched, if any. Only exists for single-character matches and if matched.
+    char: Option<u8>,
+    reason: RequireReason,
+}
+
+#[derive(Copy, Clone)]
+pub struct Checkpoint {
+    read_next: usize,
+    write_next: usize,
+}
+
+// TODO DOC
+#[derive(Copy, Clone)]
+pub struct ProcessorRange {
+    start: usize,
+    end: usize,
+}
+
+// Processing state of a file. Most fields are used internally and set during
+// processing. Single use only; create one per processing.
+pub struct Processor<'d> {
+    code: Code<'d>,
+    m: Option<Match>,
+    // Index of the next character to read.
+    read_next: usize,
+    // Index of the next unwritten space.
+    write_next: usize,
+}
+
+fn index_of(s: &'static [u8], c: u8, from: usize) -> Option<usize> {
+    for i in from..s.len() {
+        if s[i] == c {
+            return Some(i);
+        };
+    };
+    None
+}
+
+// For fast not-matching, ensure that it's possible to continue directly to next character in string
+// when searching for first substring matching pattern in string and only partially matching pattern.
+// For example, given string "abcdabc" and pattern "abcde", normal substring searching would match
+// "abcd", fail, and then start searching from 'b' at index 1. We want to be able to continue searching
+// from 'a' at index 4.
+macro_rules! debug_assert_fast_pattern {
+    ($x:expr) => {
+        debug_assert!($x.len() > 0 && index_of($x, $x[0], 1) == None);
+    }
+}
+
+impl<'d> Index<ProcessorRange> for Processor<'d> {
+    type Output = [u8];
+
+    fn index(&self, index: ProcessorRange) -> &Self::Output {
+        self.code.read_slice(index.start..index.end)
+    }
+}
+
+// For consistency and improvement of internal API, only write public functions using internal APIs.
+// Do not call other public Processor methods.
+impl<'d> Processor<'d> {
+    // INTERNAL APIs.
+    // Checking bounds.
+    fn in_bounds(&self, offset: usize) -> bool {
+        self.read_next + offset < self.code.len()
+    }
+
+    // Reading.
+    /// Get the `offset` character from next.
+    /// When `offset` is 0, the next character is returned.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn read(&self, offset: usize) -> u8 {
+        self.code.read_char(self.read_next + offset)
+    }
+    fn maybe_read(&self, offset: usize) -> Option<u8> {
+        if self.in_bounds(offset) {
+            Some(self.read(offset))
+        } else {
+            None
+        }
+    }
+
+    // Writing.
+    /// Move next `amount` characters to output.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn shift(&mut self, amount: usize) -> () {
+        self.code.copy_within(self.read_next..self.read_next + amount, self.write_next);
+        self.read_next += amount;
+    }
+
+    // Skipping.
+    /// Panics. Does not check bounds for performance (e.g. already checked).
+    fn consume(&mut self, amount: usize) -> () {
+        self.read_next += amount;
+    }
+
+    pub fn new(code: &mut [u8]) -> Processor {
+        Processor { write_next: 0, read_next: 0, code: Code { data: code }, m: None }
+    }
+
+    pub fn at_end(&self) -> bool {
+        !self.in_bounds(0)
+    }
+    pub fn written_len(&self) -> usize {
+        self.write_next
+    }
+
+    // Use match
+
+    // Query
+    pub fn matched(&self) -> bool {
+        self.m.unwrap().count > 0
+    }
+    pub fn length(&self) -> usize {
+        self.m.unwrap().count
+    }
+    pub fn char(&self) -> u8 {
+        self.m.unwrap().char.unwrap()
+    }
+    pub fn maybe_char(&self) -> Option<u8> {
+        self.m.unwrap().char
+    }
+    pub fn range(&self) -> ProcessorRange {
+        let m = self.m.unwrap();
+        ProcessorRange { start: m.start, end: m.start + m.count }
+    }
+    pub fn slice(&self) -> &[u8] {
+        let m = self.m.unwrap();
+        self.code.read_slice(m.start..m.start + m.count)
+    }
+
+    // Assert
+    fn _require(&self, custom_reason: Option<&'static str>) -> HbRes<()> {
+        let m = self.m.unwrap();
+        if m.count > 0 {
+            Ok(())
+        } else {
+            match m.reason {
+                RequireReason::Custom => Err(HbErr::ExpectedNotFound(custom_reason.unwrap())),
+                RequireReason::ExpectedNotChar(c) => Err(HbErr::ExpectedCharNotFound { expected: c, got: m.char.unwrap() }),
+                RequireReason::ExpectedChar(c) => Err(HbErr::UnexpectedCharFound(c)),
+                RequireReason::ExpectedMatch(m) => Err(HbErr::ExpectedMatchNotFound(m)),
+            }
+        }
+    }
+    pub fn require(&self) -> HbRes<()> {
+        self._require(None)
+    }
+    pub fn require_with_reason(&self, reason: &'static str) -> HbRes<()> {
+        self._require(Some(reason))
+    }
+    // TODO Document
+    pub fn expect(&self) -> () {
+        // TODO Maybe debug_assert?
+        assert!(self.m.unwrap().count > 0);
+    }
+
+    // Commit.
+    // Note that m.count has already been verified to be valid, so don't need to bounds check again.
+    pub fn keep(&mut self) -> () {
+        self.shift(self.m.unwrap().count);
+    }
+    pub fn discard(&mut self) -> () {
+        self.read_next = self.m.unwrap().start + self.m.unwrap().count;
+    }
+
+    // Helper internal functions for match_* API.
+    fn _new_match(&mut self, count: usize, char: Option<u8>, reason: RequireReason) -> () {
+        // Don't assert match doesn't exist, as otherwise we would need to clear match on every use
+        // which would slow down performance and require mutable methods for querying match.
+        let start = self.read_next;
+        self.m = Some(Match { start, count, char, reason });
+    }
+    fn _match_one<C: FnOnce(u8) -> bool>(&mut self, cond: C, reason: RequireReason) -> () {
+        match self.maybe_read(0).filter(|n| cond(*n)) {
+            Some(c) => self._new_match(1, Some(c), reason),
+            None => self._new_match(0, None, reason),
+        }
+    }
+    fn _match_greedy<C: Fn(u8) -> bool>(&mut self, cond: C) -> () {
+        let mut count = 0usize;
+        while self.in_bounds(count) && cond(self.read(count)) {
+            count += 1;
+        };
+        self._new_match(count, None, RequireReason::Custom)
+    }
+
+    // Single-char matching API.
+    pub fn match_char(&mut self, c: u8) -> () {
+        self._match_one(|n| n == c, RequireReason::ExpectedChar(c))
+    }
+    pub fn match_not_char(&mut self, c: u8) -> () {
+        self._match_one(|n| n != c, RequireReason::ExpectedNotChar(c))
+    }
+    pub fn match_member(&mut self, set: Set<u8>) -> () {
+        self._match_one(|n| set.contains(&n), RequireReason::Custom)
+    }
+    pub fn match_not_member(&mut self, set: Set<u8>) -> () {
+        self._match_one(|n| !set.contains(&n), RequireReason::Custom)
+    }
+    pub fn match_pred(&mut self, pred: fn(u8) -> bool) -> () {
+        self._match_one(|n| pred(n), RequireReason::Custom)
+    }
+    pub fn match_not_pred(&mut self, pred: fn(u8) -> bool) -> () {
+        self._match_one(|n| !pred(n), RequireReason::Custom)
+    }
+
+    // Match a sequence of characters.
+    pub fn match_seq(&mut self, pat: &'static [u8]) -> () {
+        debug_assert_fast_pattern!(pat);
+        // For faster short-circuiting matching, compare char-by-char instead of slices.
+        let len = pat.len();
+        let mut count = 0;
+        if len > 0 && self.in_bounds(len - 1) {
+            for i in 0..len {
+                if self.read(i) != pat[i] {
+                    count = 0;
+                    break;
+                };
+                count += 1;
+            };
+        };
+        self._new_match(count, None, RequireReason::Custom)
+    }
+    pub fn match_line_terminator(&mut self) -> () {
+        self._new_match(match self.maybe_read(0) {
+            Some(b'\n') => 1,
+            Some(b'\r') => 1 + self.maybe_read(1).filter(|c| *c == b'\n').is_some() as usize,
+            _ => 0,
+        }, None, RequireReason::Custom)
+    }
+
+    // Multi-char matching API.
+    pub fn match_while_char(&mut self, c: u8) -> () {
+        self._match_greedy(|n| n == c)
+    }
+    pub fn match_while_not_char(&mut self, c: u8) -> () {
+        self._match_greedy(|n| n != c)
+    }
+    pub fn match_while_member(&mut self, set: Set<u8>) -> () {
+        self._match_greedy(|n| set.contains(&n))
+    }
+    pub fn match_while_not_member(&mut self, set: Set<u8>) -> () {
+        self._match_greedy(|n| !set.contains(&n))
+    }
+    pub fn match_while_pred(&mut self, pred: fn(u8) -> bool) -> () {
+        self._match_greedy(pred)
+    }
+    pub fn match_while_not_seq(&mut self, s: &'static [u8]) -> () {
+        debug_assert_fast_pattern!(s);
+        // TODO Test
+        // TODO Document
+        let mut count = 0usize;
+        let mut srcpos = 0usize;
+        // Next character in pattern to match.
+        // For example, if `patpos` is 2, we've matched 2 characters so far and need to match character at index 2 in pattern with character `srcpos` in code.
+        let mut patpos = 0usize;
+        while self.in_bounds(srcpos) {
+            if self.read(srcpos) == s[patpos] {
+                if patpos == s.len() - 1 {
+                    // Matched last character in pattern i.e. whole pattern.
+                    break;
+                } else {
+                    srcpos += 1;
+                    patpos += 1;
+                }
+            } else {
+                count += patpos;
+                if patpos == 0 {
+                    count += 1;
+                    srcpos += 1;
+                } else {
+                    patpos = 0;
+                };
+            };
+        };
+        self._new_match(count, None, RequireReason::Custom)
+    }
+
+    pub fn checkpoint(&self) -> Checkpoint {
+        Checkpoint {
+            read_next: self.read_next,
+            write_next: self.write_next,
+        }
+    }
+
+    pub fn restore(&mut self, checkpoint: Checkpoint) -> () {
+        self.read_next = checkpoint.read_next;
+        self.write_next = checkpoint.write_next;
+    }
+
+    /// Write characters skipped from source since checkpoint. Must not have written anything since checkpoint.
+    pub fn write_skipped(&mut self, checkpoint: Checkpoint) -> () {
+        // Make sure that nothing has been written since checkpoint (which would be lost).
+        debug_assert_eq!(self.write_next, checkpoint.write_next);
+        // Get src code from checkpoint until last consumed character (inclusive).
+        self.code.copy_within(checkpoint.read_next..self.read_next, checkpoint.write_next);
+    }
+
+    /// Discard characters written since checkpoint but keep source position.
+    pub fn erase_written(&mut self, checkpoint: Checkpoint) -> () {
+        self.write_next = checkpoint.write_next;
+    }
+
+    pub fn consumed_count(&self, checkpoint: Checkpoint) -> usize {
+        self.read_next - checkpoint.read_next
+    }
+
+    pub fn written_count(&self, checkpoint: Checkpoint) -> usize {
+        self.write_next - checkpoint.write_next
+    }
+
+    /// Get the `offset` character from next.
+    /// When `offset` is 0, the next character is returned.
+    pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
+        self.maybe_read(offset)
+    }
+    pub fn peek_offset(&self, offset: usize) -> HbRes<u8> {
+        self.maybe_read(offset).ok_or(HbErr::UnexpectedEnd)
+    }
+    pub fn peek_eof(&self) -> Option<u8> {
+        self.maybe_read(0)
+    }
+    pub fn peek(&self) -> HbRes<u8> {
+        self.maybe_read(0).ok_or(HbErr::UnexpectedEnd)
+    }
+
+    /// Skip the next `count` characters (can be zero).
+    /// Will result in an error if exceeds bounds.
+    pub fn skip_amount(&mut self, count: usize) -> HbRes<()> {
+        // Check for zero to prevent underflow as type is usize.
+        if count == 0 || self.in_bounds(count - 1) {
+            self.consume(count);
+            Ok(())
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+    /// Skip and return the next character.
+    /// Will result in an error if exceeds bounds.
+    pub fn skip(&mut self) -> HbRes<u8> {
+        if !self.at_end() {
+            let c = self.read(0);
+            self.consume(1);
+            Ok(c)
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+
+    /// Write `c` to output. Will panic if exceeds bounds.
+    pub fn write(&mut self, c: u8) -> () {
+        self.code.write_char(self.write_next, c);
+    }
+    /// Write `s` to output. Will panic if exceeds bounds.
+    pub fn write_slice(&mut self, s: &[u8]) -> () {
+        self.code.write_slice(self.write_next, s);
+    }
+    /// Does not check if `c` is a valid Unicode code point.
+    pub fn write_utf8(&mut self, c: u32) -> () {
+        // Don't use char::encode_utf8 as it requires a valid code point,
+        // and requires passing a [u8, 4] which might be heap-allocated.
+        if c <= 0x7F {
+            // Plain ASCII.
+            self.write(c as u8);
+        } else if c <= 0x07FF {
+            // 2-byte UTF-8.
+            self.write((((c >> 6) & 0x1F) | 0xC0) as u8);
+            self.write((((c >> 0) & 0x3F) | 0x80) as u8);
+        } else if c <= 0xFFFF {
+            // 3-byte UTF-8.
+            self.write((((c >> 12) & 0x0F) | 0xE0) as u8);
+            self.write((((c >> 6) & 0x3F) | 0x80) as u8);
+            self.write((((c >> 0) & 0x3F) | 0x80) as u8);
+        } else if c <= 0x10FFFF {
+            // 4-byte UTF-8.
+            self.write((((c >> 18) & 0x07) | 0xF0) as u8);
+            self.write((((c >> 12) & 0x3F) | 0x80) as u8);
+            self.write((((c >> 6) & 0x3F) | 0x80) as u8);
+            self.write((((c >> 0) & 0x3F) | 0x80) as u8);
+        } else {
+            unreachable!();
+        }
+    }
+
+    pub fn accept(&mut self) -> HbRes<u8> {
+        if !self.at_end() {
+            let c = self.read(0);
+            self.shift(1);
+            Ok(c)
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+    pub fn accept_amount(&mut self, count: usize) -> HbRes<()> {
+        // Check for zero to prevent underflow as type is usize.
+        if count == 0 || self.in_bounds(count - 1) {
+            self.shift(count);
+            Ok(())
+        } else {
+            Err(HbErr::UnexpectedEnd)
+        }
+    }
+}
--- a/src/proc/attr/mod.rs
+++ b/src/proc/attr/mod.rs
@ -1,48 +0,0 @@
-use crate::proc::Processor;
-use crate::err::HbRes;
-use crate::spec::codepoint::is_control;
-use crate::code::Code;
-use crate::proc::attr::quoted::{is_attr_quote, process_quoted_val};
-use crate::proc::attr::unquoted::process_attr_unquoted_val;
-
-mod quoted;
-mod unquoted;
-
-pub enum AttrType {
-    // Special value for hb_unit_tag.
-    None,
-
-    Quoted,
-    Unquoted,
-    NoValue,
-}
-
-// Characters allowed in an attribute name.
-// NOTE: Unicode noncharacters not tested.
-// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
-fn is_name_char(c: u8) -> bool {
-    match c {
-        b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
-        c => !is_control(c),
-    }
-}
-
-pub fn process_attr<D: Code>(proc: &Processor<D>) -> HbRes<AttrType> {
-    let name = proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().slice();
-
-    let should_collapse_and_trim_value_ws = name.eq_ignore_ascii_case(b"class");
-    let has_value = proc.match_char(b'=').keep().matched();
-
-    if !has_value {
-        Ok(AttrType::NoValue)
-    } else {
-        if proc.match_pred(is_attr_quote).matched() {
-            // Quoted attribute value.
-            process_quoted_val(proc, should_collapse_and_trim_value_ws)
-        } else {
-            // Unquoted attribute value.
-            process_attr_unquoted_val(proc)?;
-            Ok(AttrType::Unquoted)
-        }
-    }
-}
--- a/src/proc/attr/unquoted.rs
+++ b/src/proc/attr/unquoted.rs
@ -1,36 +0,0 @@
-use crate::proc::Processor;
-use crate::err::{HbRes, HbErr};
-use crate::spec::codepoint::is_whitespace;
-use crate::code::Code;
-use crate::proc::entity::process_entity;
-
-// Characters not allowed in an unquoted attribute value.
-// See https://html.spec.whatwg.org/multipage/syntax.html#unquoted for spec.
-fn is_valid_unquoted_value_char(c: u8) -> bool {
-    match c {
-        b'"' | b'\'' | b'`' | b'=' | b'<' | b'>' => true,
-        c => !is_whitespace(c),
-    }
-}
-
-// TODO Unquoted could be optimised to quoted if used entities to encode illegal chars.
-pub fn process_attr_unquoted_val<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    let mut at_least_one_char = false;
-
-    loop {
-        if proc.match_char(b'&').matched() {
-            // Process entity.
-            // TODO Entity could decode to illegal character.
-            process_entity(proc);
-        } else if !proc.match_pred(is_valid_unquoted_value_char).keep().matched() {
-            break;
-        }
-        at_least_one_char = true;
-    }
-
-    if !at_least_one_char {
-        Err(HbErr::ExpectedNotFound("Expected unquoted attribute value"))
-    } else {
-        Ok(())
-    }
-}
--- a/src/proc/bang.rs
+++ b/src/proc/bang.rs
@ -1,13 +0,0 @@
-use crate::proc::Processor;
-use crate::code::Code;
-use crate::err::HbRes;
-
-pub fn process_bang<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    proc.match_seq(b"<!").require()?.keep();
-
-    proc.match_while_not_char(b'>').keep();
-
-    proc.match_char(b'>').require()?.keep();
-
-    Ok(())
-}
--- a/src/proc/comment.rs
+++ b/src/proc/comment.rs
@ -1,14 +0,0 @@
-use crate::proc::Processor;
-use crate::code::Code;
-use crate::err::HbRes;
-
-pub fn process_comment<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    proc.match_seq(b"<!--").expect().discard();
-
-    // TODO Cannot use this pattern
-    proc.match_while_not_seq(b"-->").discard();
-
-    proc.match_seq(b"-->").require_with_reason("comment end")?.discard();
-
-    Ok(())
-}
--- a/src/proc/content.rs
+++ b/src/proc/content.rs
@ -1,156 +0,0 @@
-use crate::code::Code;
-use crate::proc::Processor;
-use crate::spec::codepoint::is_whitespace;
-use crate::proc::comment::process_comment;
-use crate::proc::bang::process_bang;
-use crate::proc::entity::process_entity;
-use crate::proc::tag::process_tag;
-use crate::err::HbRes;
-use crate::spec::tag::wss::WSS_TAGS;
-use crate::spec::tag::content::CONTENT_TAGS;
-use crate::spec::tag::formatting::FORMATTING_TAGS;
-
-#[derive(PartialEq)]
-enum State {
-	Comment,
-	Bang,
-	OpeningTag,
-
-	Start,
-	End,
-	Entity,
-	Whitespace,
-	Text,
-}
-
-impl State {
-	fn is_comment_bang_opening_tag(&self) -> bool {
-		match self {
-			State::Comment | State::Bang | State::OpeningTag => true,
-			_ => false,
-		}
-	}
-
-	fn next_state<D: Code>(proc: &Processor<D>) -> State {
-		// TODO Optimise to trie.
-
-		if proc.data.at_end() || proc.match_seq(b"</").matched() {
-			return State::End;
-		}
-
-		if proc.match_pred(is_whitespace).matched() {
-			return State::Whitespace;
-		}
-
-		if proc.match_seq(b"<!--").matched() {
-			return State::Comment;
-		}
-
-		// Check after comment
-		if proc.match_seq(b"<!").matched() {
-			return State::Bang;
-		};
-
-		// Check after comment and bang
-		if proc.match_char(b'<').matched() {
-			return State::OpeningTag;
-		};
-
-		if proc.match_char(b'&').matched() {
-			return State::Entity;
-		};
-
-		return State::Text;
-	}
-}
-
-/*
- * Whitespace handling is the trickiest part of this function.
- * There are three potential minification settings that affect whitespace
- * handling:
- *   - collapse
- *   - destroy whole
- *   - trim
- * What whitespace to minify depends on the parent and configured settings.
- * We want to prevent memory allocation and use only one pass, but whitespace
- * handling often involves looking ahead.
- */
-pub fn process_content<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes<()> {
-	let should_collapse_whitespace = parent.filter(|p| !WSS_TAGS.contains(p)).is_some();
-	let should_destroy_whole_whitespace = parent.filter(|p| !WSS_TAGS.contains(p) && !CONTENT_TAGS.contains(p) && !FORMATTING_TAGS.contains(p)).is_some();
-	let should_trim_whitespace = parent.filter(|p| !WSS_TAGS.contains(p) && !FORMATTING_TAGS.contains(p)).is_some();
-
-	// Trim leading whitespace if configured to do so.
-	if should_trim_whitespace {
-		proc.match_while_pred(is_whitespace).discard();
-	};
-
-	let mut last_state = State::Start;
-	// Whether or not currently in whitespace.
-	let mut whitespace_start = None;
-	// If currently in whitespace, whether or not current contiguous
-	// whitespace started after a bang, comment, or tag.
-	let mut whitespace_started_after_cbot = false;
-
-	loop {
-		let next_state = State::next_state(proc);
-
-		if next_state == State::Whitespace {
-			// Whitespace is always buffered and then processed
-			// afterwards, even if not minifying.
-			proc.skip();
-
-			if last_state != State::Whitespace {
-				// This is the start of one or more whitespace
-				// characters, so start a view of this
-				// contiguous whitespace and don't write any
-				// characters that are part of it yet.
-				whitespace_start = Some(proc.start_read_slice());
-				whitespace_started_after_cbot = last_state.is_comment_bang_opening_tag();
-			} else {
-				// This is part of a contiguous whitespace, but
-				// not the start of, so simply ignore.
-			}
-		} else {
-			// Next character is not whitespace, so handle any
-			// previously buffered whitespace.
-			if let Some(whitespace_buffered) = whitespace_start {
-				if should_destroy_whole_whitespace && whitespace_started_after_cbot && next_state.is_comment_bang_opening_tag() {
-					// Whitespace is between two tags, comments, or bangs.
-					// destroy_whole_whitespace is on, so don't write it.
-				} else if should_trim_whitespace && next_state == State::End {
-					// Whitespace is trailing.
-					// should_trim_whitespace is on, so don't write it.
-				} else if should_collapse_whitespace {
-					// Current contiguous whitespace needs to be reduced to a single space character.
-					proc.write(b' ');
-				} else {
-					// Whitespace cannot be minified, so
-					// write in entirety.
-					proc.write_slice(proc.get_slice(whitespace_buffered));
-				}
-
-				// Reset whitespace buffer.
-				whitespace_start = None;
-			};
-
-			// Process and consume next character(s).
-			match next_state {
-				State::Comment => process_comment(proc),
-				State::Bang => process_bang(proc),
-				State::OpeningTag => process_tag(proc, parent),
-				State::End => (),
-				State::Entity => process_entity(proc),
-				State::Text => proc.accept(),
-				_ => unreachable!(),
-			};
-		};
-
-		last_state = next_state;
-		if next_state == State::End {
-			break;
-		};
-	};
-
-	Ok(())
-}
--- a/src/proc/mod.rs
+++ b/src/proc/mod.rs
@ -1,368 +0,0 @@
-use crate::err::{HbErr, HbRes};
-use phf::Set;
-use crate::code::Code;
-
-pub mod attr;
-pub mod bang;
-pub mod comment;
-pub mod content;
-pub mod entity;
-pub mod script;
-pub mod style;
-pub mod tag;
-
-pub enum RequireReason {
-    Custom,
-    ExpectedNotChar(u8),
-    ExpectedMatch(&'static [u8]),
-    ExpectedChar(u8),
-}
-
-struct Match<'d, D: Code> {
-    data: &'d mut D,
-    // Need to record start as we might get slice after keeping or skipping.
-    start: usize,
-    // Guaranteed amount of characters that exist from `start` at time of creation of this struct.
-    count: usize,
-    // Character matched, if any. Only exists for single-character matches and if matched.
-    char: Option<u8>,
-    reason: RequireReason,
-}
-
-impl<D: Code> Match<'_, D> {
-    // Query
-    pub fn matched(&self) -> bool {
-        self.count > 0
-    }
-    pub fn length(&self) -> usize {
-        self.count
-    }
-    pub fn char(&self) -> u8 {
-        self.char.unwrap()
-    }
-    pub fn maybe_char(&self) -> Option<u8> {
-        self.char
-    }
-    pub fn slice(&self) -> &[u8] {
-        self.data.get_src_slice(self.start..self.start + self.count)
-    }
-
-    // Assert
-    fn _require(&self, custom_reason: Option<&'static str>) -> HbRes<&Self> {
-        if self.count > 0 {
-            Ok(self)
-        } else {
-            match self.reason {
-                RequireReason::Custom => Err(HbErr::ExpectedNotFound(custom_reason.unwrap())),
-                RequireReason::ExpectedNotChar(c) => Err(HbErr::ExpectedCharNotFound {
-                    expected: c,
-                    got: self.char.unwrap(),
-                }),
-                RequireReason::ExpectedChar(c) => Err(HbErr::UnexpectedCharFound(c)),
-                RequireReason::ExpectedMatch(m) => Err(HbErr::ExpectedMatchNotFound(m)),
-            }
-        }
-    }
-    pub fn require(&self) -> HbRes<&Self> {
-        self._require(None)
-    }
-    pub fn require_with_reason(&self, reason: &'static str) -> HbRes<&Self> {
-        self._require(Some(reason))
-    }
-    // TODO Document
-    pub fn expect(&self) -> &Self {
-        // TODO Maybe debug_assert?
-        assert!(self.count > 0);
-        self
-    }
-
-    // Commit.
-    // Note that self.count has already been verified to be valid, so don't need to bounds check again.
-    pub fn keep(&self) -> &Self {
-        self.data.shift(self.count);
-        self
-    }
-    pub fn discard(&self) -> &Self {
-        self.data.set_src_pos(self.count);
-        self
-    }
-}
-
-struct Checkpoint<'d, D: Code> {
-    data: &'d mut D,
-    src_pos: usize,
-    out_pos: usize,
-}
-
-impl<D: Code> Checkpoint<'_, D> {
-    pub fn restore(&self) -> () {
-        self.data.set_src_pos(self.src_pos);
-        self.data.set_out_pos(self.out_pos);
-    }
-
-    /// Write characters skipped from source since checkpoint. Must not have written anything since checkpoint.
-    pub fn write_skipped(&self) -> () {
-        // Make sure that nothing has been written since checkpoint (which would be lost).
-        debug_assert_eq!(self.data.get_out_pos(), self.out_pos);
-        // Get src code from checkpoint until last consumed character (inclusive).
-        let skipped = self.data.get_src_slice(self.src_pos..self.data.get_src_pos());
-        self.data.write_slice(skipped);
-    }
-
-    /// Discard characters written since checkpoint but keep source position.
-    pub fn erase_written(&self) -> () {
-        self.data.set_out_pos(self.out_pos);
-    }
-
-    pub fn consumed_count(&self) -> usize {
-        self.data.get_src_pos() - self.src_pos
-    }
-
-    pub fn written_count(&self) -> usize {
-        self.data.get_out_pos() - self.out_pos
-    }
-}
-
-// Processing state of a file. Most fields are used internally and set during
-// processing. Single use only; create one per processing.
-pub struct Processor<'data, D: Code> {
-    pub data: &'data mut D,
-}
-
-fn index_of(s: &'static [u8], c: u8, from: usize) -> Option<usize> {
-    for i in from..s.len() {
-        if s[i] == c {
-            return Some(i);
-        };
-    };
-    None
-}
-
-// For fast not-matching, ensure that it's possible to continue directly to next character in string
-// when searching for first substring matching pattern in string and only partially matching pattern.
-// For example, given string "abcdabc" and pattern "abcde", normal substring searching would match
-// "abcd", fail, and then start searching from 'b' at index 1. We want to be able to continue searching
-// from 'a' at index 4.
-macro_rules! debug_assert_fast_pattern {
-    ($x:expr) => {
-        debug_assert!($x.len() > 0 && index_of($x, $x[0], 1) == None);
-    }
-}
-
-// For consistency and improvement of underlying API, only write methods in terms of the underlying API (Code methods). Do not call other Proc methods.
-// TODO Return refs for matches.
-impl<D: Code> Processor<'_, D> {
-    // Helper internal functions for match_* API.
-    fn _new_match(&self, count: usize, char: Option<u8>, reason: RequireReason) -> Match<D> {
-        Match {
-            data: self.data,
-            start: self.data.get_src_pos(),
-            count,
-            char,
-            reason,
-        }
-    }
-    fn _match_one<C: FnOnce(u8) -> bool>(&self, cond: C, reason: RequireReason) -> Match<D> {
-        let m = self.data.maybe_read(0).filter(|n| cond(*n));
-        self._new_match(m.is_some() as usize, m, reason)
-    }
-    fn _match_greedy<C: FnOnce(u8) -> bool>(&self, cond: C) -> Match<D> {
-        let mut count = 0usize;
-        while self.data.in_bounds(count) && cond(self.data.read(count)) {
-            count += 1;
-        };
-        self._new_match(count, None, RequireReason::Custom)
-    }
-
-    // Single-char matching API.
-    pub fn match_char(&self, c: u8) -> Match<D> {
-        self._match_one(|n| n == c, RequireReason::ExpectedChar(c))
-    }
-    pub fn match_not_char(&self, c: u8) -> Match<D> {
-        self._match_one(|n| n != c, RequireReason::ExpectedNotChar(c))
-    }
-    pub fn match_member(&self, set: Set<u8>) -> Match<D> {
-        self._match_one(|n| set.contains(&n), RequireReason::Custom)
-    }
-    pub fn match_not_member(&self, set: Set<u8>) -> Match<D> {
-        self._match_one(|n| !set.contains(&n), RequireReason::Custom)
-    }
-    pub fn match_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
-        self._match_one(|n| pred(n), RequireReason::Custom)
-    }
-    pub fn match_not_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
-        self._match_one(|n| !pred(n), RequireReason::Custom)
-    }
-
-    // Match a sequence of characters.
-    pub fn match_seq(&self, pat: &'static [u8]) -> Match<D> {
-        debug_assert_fast_pattern!(pat);
-        // For faster short-circuiting matching, compare char-by-char instead of slices.
-        let len = pat.len();
-        let mut count = 0;
-        if len > 0 && self.data.in_bounds(len - 1) {
-            for i in 0..len {
-                if self.data.read(i) != pat[i] {
-                    count = 0;
-                    break;
-                };
-                count += 1;
-            };
-        };
-        self._new_match(count, None, RequireReason::Custom)
-    }
-    pub fn match_line_terminator(&self) -> Match<D> {
-        self._new_match(match self.data.maybe_read(0) {
-            Some(b'\n') => 1,
-            Some(b'\r') => 1 + self.data.maybe_read(1).filter(|c| *c == b'\n').is_some() as usize,
-            _ => 0,
-        }, None, RequireReason::Custom)
-    }
-
-    // Multi-char matching API.
-    pub fn match_while_char(&self, c: u8) -> Match<D> {
-        self._match_greedy(|n| n == c)
-    }
-    pub fn match_while_not_char(&self, c: u8) -> Match<D> {
-        self._match_greedy(|n| n != c)
-    }
-    pub fn match_while_member(&self, set: Set<u8>) -> Match<D> {
-        self._match_greedy(|n| set.contains(&n))
-    }
-    pub fn match_while_not_member(&self, set: Set<u8>) -> Match<D> {
-        self._match_greedy(|n| !set.contains(&n))
-    }
-    pub fn match_while_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
-        self._match_greedy(pred)
-    }
-    pub fn match_while_not_seq(&self, s: &'static [u8]) -> Match<D> {
-        debug_assert_fast_pattern!(s);
-        // TODO Test
-        // TODO Document
-        let mut count = 0usize;
-        let mut srcpos = 0usize;
-        // Next character in pattern to match.
-        // For example, if `patpos` is 2, we've matched 2 characters so far and need to match character at index 2 in pattern with character `srcpos` in code.
-        let mut patpos = 0usize;
-        while self.data.in_bounds(srcpos) {
-            if self.data.read(srcpos) == s[patpos] {
-                if patpos == s.len() - 1 {
-                    // Matched last character in pattern i.e. whole pattern.
-                    break;
-                } else {
-                    srcpos += 1;
-                    patpos += 1;
-                }
-            } else {
-                count += patpos;
-                if patpos == 0 {
-                    count += 1;
-                    srcpos += 1;
-                } else {
-                    patpos = 0;
-                };
-            };
-        };
-        self._new_match(count, None, RequireReason::Custom)
-    }
-
-    pub fn checkpoint(&self) -> Checkpoint<D> {
-        Checkpoint {
-            data: self.data,
-            src_pos: self.data.get_src_pos(),
-            out_pos: self.data.get_out_pos(),
-        }
-    }
-
-    /// Get the `offset` character from next.
-    /// When `offset` is 0, the next character is returned.
-    pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
-        self.data.maybe_read(offset)
-    }
-    pub fn peek_offset(&self, offset: usize) -> HbRes<u8> {
-        self.data.maybe_read(offset).ok_or(HbErr::UnexpectedEnd)
-    }
-    pub fn peek_eof(&self) -> Option<u8> {
-        self.data.maybe_read(0)
-    }
-    pub fn peek(&self) -> HbRes<u8> {
-        self.data.maybe_read(0).ok_or(HbErr::UnexpectedEnd)
-    }
-
-    /// Skip the next `count` characters (can be zero).
-    /// Will result in an error if exceeds bounds.
-    pub fn skip_amount(&self, count: usize) -> HbRes<()> {
-        // Check for zero to prevent underflow as type is usize.
-        if count == 0 || self.data.in_bounds(count - 1) {
-            self.data.consume(count);
-            Ok(())
-        } else {
-            Err(HbErr::UnexpectedEnd)
-        }
-    }
-    /// Skip and return the next character.
-    /// Will result in an error if exceeds bounds.
-    pub fn skip(&self) -> HbRes<u8> {
-        if !self.data.at_end() {
-            let c = self.data.read(0);
-            self.data.consume(1);
-            Ok(c)
-        } else {
-            Err(HbErr::UnexpectedEnd)
-        }
-    }
-
-    /// Write `c` to output. Will panic if exceeds bounds.
-    pub fn write(&self, c: u8) -> () {
-        self.data.write(c)
-    }
-    /// Write `s` to output. Will panic if exceeds bounds.
-    pub fn write_slice(&self, s: &[u8]) -> () {
-        self.data.write_slice(s)
-    }
-    /// Does not check if `c` is a valid Unicode code point.
-    pub fn write_utf8(&self, c: u32) -> () {
-        // Don't use char::encode_utf8 as it requires a valid code point,
-        // and requires passing a [u8, 4] which might be heap-allocated.
-        if c <= 0x7F {
-            // Plain ASCII.
-            self.data.write(c as u8);
-        } else if c <= 0x07FF {
-            // 2-byte UTF-8.
-            self.data.write((((c >> 6) & 0x1F) | 0xC0) as u8);
-            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
-        } else if c <= 0xFFFF {
-            // 3-byte UTF-8.
-            self.data.write((((c >> 12) & 0x0F) | 0xE0) as u8);
-            self.data.write((((c >> 6) & 0x3F) | 0x80) as u8);
-            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
-        } else if c <= 0x10FFFF {
-            // 4-byte UTF-8.
-            self.data.write((((c >> 18) & 0x07) | 0xF0) as u8);
-            self.data.write((((c >> 12) & 0x3F) | 0x80) as u8);
-            self.data.write((((c >> 6) & 0x3F) | 0x80) as u8);
-            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
-        } else {
-            unreachable!();
-        }
-    }
-
-    pub fn accept(&self) -> HbRes<u8> {
-        if !self.data.at_end() {
-            let c = self.data.read(0);
-            self.data.shift(1);
-            Ok(c)
-        } else {
-            Err(HbErr::UnexpectedEnd)
-        }
-    }
-    pub fn accept_amount(&self, count: usize) -> HbRes<()> {
-        // Check for zero to prevent underflow as type is usize.
-        if count == 0 || self.data.in_bounds(count - 1) {
-            self.data.shift(count);
-            Ok(())
-        } else {
-            Err(HbErr::UnexpectedEnd)
-        }
-    }
-}
--- a/src/unit/attr/mod.rs
+++ b/src/unit/attr/mod.rs
@ -0,0 +1,46 @@
+use crate::proc::Processor;
+use crate::err::HbRes;
+use crate::spec::codepoint::is_control;
+use phf::{Set, phf_set};
+use crate::unit::attr::value::process_attr_value;
+
+mod value;
+
+static COLLAPSIBLE_AND_TRIMMABLE_ATTRS: Set<&'static [u8]> = phf_set! {
+    b"class",
+};
+
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub enum AttrType {
+    // Special value for `process_tag`.
+    None,
+
+    Quoted,
+    Unquoted,
+    NoValue,
+}
+
+// Characters allowed in an attribute name.
+// NOTE: Unicode noncharacters not tested.
+// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
+fn is_name_char(c: u8) -> bool {
+    match c {
+        b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
+        c => !is_control(c),
+    }
+}
+
+pub fn process_attr<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<AttrType> {
+    // Expect `process_attr` to be called at an attribute.
+    let name = cascade_return!(proc.match_while_pred(is_name_char).expect().keep().slice());
+
+    // TODO DOC Attr must be case sensitive
+    let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(name);
+    let has_value = cascade_return!(proc.match_char(b'=').keep().matched());
+
+    if !has_value {
+        Ok(AttrType::NoValue)
+    } else {
+        process_attr_value(proc, should_collapse_and_trim_value_ws)
+    }
+}
--- a/src/proc/attr/quoted.rs
+++ b/src/proc/attr/quoted.rs
@ -1,11 +1,10 @@
-use crate::proc::{Processor, Match};
-use crate::proc::attr::AttrType;
-use crate::code::Code;
-use crate::spec::codepoint::is_whitespace;
-use crate::proc::entity::{process_entity, parse_entity};
+use phf::{Map, phf_map};
+
 use crate::err::HbRes;
-use phf::Map;
-use std::thread::current;
+use crate::proc::Processor;
+use crate::spec::codepoint::is_whitespace;
+use crate::unit::attr::AttrType;
+use crate::unit::entity::{parse_entity, process_entity};

 pub fn is_double_quote(c: u8) -> bool {
    c == b'"'
@ -31,14 +30,14 @@ static ENCODED: Map<u8, &'static [u8]> = phf_map! {
    b'"' => b"&#34;",
    b'>' => b"&gt;",
    // Whitespace characters as defined by spec in crate::spec::codepoint::is_whitespace.
-    0x09 => b"&#9;",
-    0x0a => b"&#10;",
-    0x0c => b"&#12;",
-    0x0d => b"&#13;",
-    0x20 => b"&#32;",
+    b'\x09' => b"&#9;",
+    b'\x0a' => b"&#10;",
+    b'\x0c' => b"&#12;",
+    b'\x0d' => b"&#13;",
+    b'\x20' => b"&#32;",
 };

-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Eq, PartialEq)]
 enum CharType {
    End,
    MalformedEntity,
@ -58,12 +57,12 @@ impl CharType {
            b'"' => CharType::DoubleQuote,
            b'\'' => CharType::SingleQuote,
            b'>' => CharType::RightChevron,
-            c => if is_whitespace(c) { CharType::Whitespace(c) } else { CharType::Normal },
+            c => if is_whitespace(c) { CharType::Whitespace(c) } else { CharType::Normal(c) },
        }
    }
 }

-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Eq, PartialEq)]
 enum DelimiterType {
    Double,
    Single,
@ -91,14 +90,14 @@ impl Metrics {
        match char_type {
            CharType::Whitespace(c) => {
                self.count_whitespace += 1;
-                self.total_whitespace_encoded_length += ENCODED[c].len();
+                self.total_whitespace_encoded_length += ENCODED[&c].len();
            }
            CharType::SingleQuote => self.count_single_quotation += 1,
            CharType::DoubleQuote => self.count_double_quotation += 1,
            _ => (),
        };

-        if self.first_char_type == None {
+        if let None = self.first_char_type {
            self.first_char_type = Some(char_type);
        };
        self.last_char_type = Some(char_type);
@ -110,13 +109,13 @@ impl Metrics {
        // NOTE: Don't need to consider whitespace for either as all whitespace will be encoded and counts as part of `total_whitespace_encoded_length`.
        let first_char_encoding_cost = match self.first_char_type {
            // WARNING: Change `first_char_is_quote_encoded` if changing here.
-            Some(CharType::DoubleQuote) => ENCODED[b'"'].len(),
-            Some(CharType::SingleQuote) => ENCODED[b'\''].len(),
+            Some(CharType::DoubleQuote) => ENCODED[&b'"'].len(),
+            Some(CharType::SingleQuote) => ENCODED[&b'\''].len(),
            _ => 0,
        };
        let first_char_is_quote_encoded = first_char_encoding_cost > 0;
-        let last_char_encoding_cost = match last_char_type {
-            Some(CharType::RightChevron) => ENCODED[b'>'].len(),
+        let last_char_encoding_cost = match self.last_char_type {
+            Some(CharType::RightChevron) => ENCODED[&b'>'].len(),
            _ => 0,
        };

@ -131,11 +130,11 @@ impl Metrics {
    }

    fn single_quoted_cost(&self) -> usize {
-        self.count_single_quotation * ENCODED[b'\''].len() + self.count_double_quotation + self.count_whitespace
+        self.count_single_quotation * ENCODED[&b'\''].len() + self.count_double_quotation + self.count_whitespace
    }

    fn double_quoted_cost(&self) -> usize {
-        self.count_double_quotation * ENCODED[b'"'].len() + self.count_single_quotation + self.count_whitespace
+        self.count_double_quotation * ENCODED[&b'"'].len() + self.count_single_quotation + self.count_whitespace
    }

    fn get_optimal_delimiter_type(&self) -> DelimiterType {
@ -156,61 +155,59 @@ impl Metrics {
    }
 }

-fn consume_attr_value<D: Code>(
-    proc: &Processor<D>,
-    should_collapse_and_trim_ws: bool,
-    delimiter_pred: fn(u8) -> bool,
-    on_entity: fn(&Processor<D>) -> HbRes<Option<u32>>,
-    on_char: fn(char_type: CharType, char_no: usize) -> (),
-) -> HbRes<()> {
-    // Set to true when one or more immediately previous characters were whitespace and deferred for processing after the contiguous whitespace.
-    // NOTE: Only used if `should_collapse_and_trim_ws`.
-    let mut currently_in_whitespace = false;
-    let mut char_no = 0;
-    loop {
-        let char_type = if proc.match_pred(delimiter_pred).matched() {
-            // DO NOT BREAK HERE. More processing is done afterwards upon reaching end.
-            CharType::End
-        } else if proc.match_char(b'&').matched() {
-            match on_entity(proc)? {
-                Some(e) => if e <= 0x7f { CharType::from_char(e as u8) } else { CharType::DecodedNonAscii },
-                None => CharType::MalformedEntity,
-            }
-        } else {
-            CharType::from_char(proc.skip()?)
-        };
+macro_rules! consume_attr_value_chars {
+    ($proc:ident, $should_collapse_and_trim_ws:ident, $delimiter_pred:ident, $entity_processor:ident, $out_char_type:ident, $on_char:block) => {
+        // Set to true when one or more immediately previous characters were whitespace and deferred for processing after the contiguous whitespace.
+        // NOTE: Only used if `should_collapse_and_trim_ws`.
+        let mut currently_in_whitespace = false;
+        // Needed to check if at beginning of value so that leading whitespace can be trimmed instead of collapsed.
+        // NOTE: Only used if `should_collapse_and_trim_ws`.
+        let mut currently_first_char = true;

-        if should_collapse_and_trim_ws {
-            if let CharType::Whitespace(_) = char_type {
-                // Ignore this whitespace character, but mark the fact that we are currently in contiguous whitespace.
-                currently_in_whitespace = true;
-                continue;
+        loop {
+            let char_type = if cascade_return!($proc.match_pred($delimiter_pred).matched()) {
+                // DO NOT BREAK HERE. More processing is done afterwards upon reaching end.
+                CharType::End
+            } else if cascade_return!($proc.match_char(b'&').matched()) {
+                match $entity_processor($proc)? {
+                    Some(e) => if e <= 0x7f { CharType::from_char(e as u8) } else { CharType::DecodedNonAscii },
+                    None => CharType::MalformedEntity,
+                }
            } else {
-                // Now past whitespace (e.g. moved to non-whitespace char or end of attribute value). Either:
-                // - ignore contiguous whitespace (i.e. do nothing) if we are currently at beginning or end of value; or
-                // - collapse contiguous whitespace (i.e. count as one whitespace char) otherwise.
-                if currently_in_whitespace && first_char_type != None && char_type != CharType::End {
-                    // Collect current collapsed contiguous whitespace that was ignored previously.
-                    on_char(CharType::Whitespace(b' '), char_no);
-                    char_no += 1;
+                CharType::from_char($proc.skip()?)
+            };
+
+            if $should_collapse_and_trim_ws {
+                if let CharType::Whitespace(_) = char_type {
+                    // Ignore this whitespace character, but mark the fact that we are currently in contiguous whitespace.
+                    currently_in_whitespace = true;
+                    continue;
+                } else {
+                    // Now past whitespace (e.g. moved to non-whitespace char or end of attribute value). Either:
+                    // - ignore contiguous whitespace (i.e. do nothing) if we are currently at beginning or end of value; or
+                    // - collapse contiguous whitespace (i.e. count as one whitespace char) otherwise.
+                    if currently_in_whitespace && !currently_first_char && char_type != CharType::End {
+                        // Collect current collapsed contiguous whitespace that was ignored previously.
+                        $out_char_type = CharType::Whitespace(b' ');
+                        $on_char;
+                    };
+                    currently_in_whitespace = false;
                };
-                currently_in_whitespace = false;
+            };
+
+            match char_type {
+                CharType::End => break,
+                char_type => {
+                    $out_char_type = char_type;
+                    $on_char;
+                    currently_first_char = false;
+                }
            };
        };
-
-        if char_type == CharType::End {
-            break;
-        } else {
-            on_char(char_type, char_no);
-            char_no += 1;
-        };
    };
-
-    Ok(())
 }

-// TODO Might encounter danger if Unicode whitespace is considered as whitespace.
-pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim_ws: bool) -> HbRes<AttrType> {
+pub fn process_attr_value<'d, 'p>(proc: &'p mut Processor<'d>, should_collapse_and_trim_ws: bool) -> HbRes<AttrType> {
    // Processing a quoted attribute value is tricky, due to the fact that
    // it's not possible to know whether or not to unquote the value until
    // the value has been processed. For example, decoding an entity could
@ -227,7 +224,7 @@ pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim
    // 4. Post-process the output by adding delimiter quotes and encoding
    // quotes in values. This does mean that the output is written to twice.

-    let src_delimiter = proc.match_pred(is_attr_quote).discard().maybe_char();
+    let src_delimiter = cascade_return!(proc.match_pred(is_attr_quote).discard().maybe_char());
    let src_delimiter_pred = match src_delimiter {
        Some(b'"') => is_double_quote,
        Some(b'\'') => is_single_quote,
@ -246,16 +243,13 @@ pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim
        last_char_type: None,
        collected_count: 0,
    };
-    consume_attr_value(
-        proc,
-        should_collapse_and_trim_ws,
-        src_delimiter_pred,
-        parse_entity,
-        |char_type, _| metrics.collect_char_type(char_type),
-    )?;
+    let mut char_type;
+    consume_attr_value_chars!(proc, should_collapse_and_trim_ws, src_delimiter_pred, parse_entity, char_type, {
+        metrics.collect_char_type(char_type);
+    });

    // Stage 2: optimally minify attribute value using metrics.
-    value_start_checkpoint.restore();
+    proc.restore(value_start_checkpoint);
    let optimal_delimiter = metrics.get_optimal_delimiter_type();
    let optimal_delimiter_char = match optimal_delimiter {
        DelimiterType::Double => Some(b'"'),
@ -266,48 +260,47 @@ pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim
    if let Some(c) = optimal_delimiter_char {
        proc.write(c);
    }
-    consume_attr_value(
-        proc,
-        should_collapse_and_trim_ws,
-        src_delimiter_pred,
-        process_entity,
-        |char_type, char_no| match char_type {
+    let mut char_type;
+    let mut char_no = 0;
+    consume_attr_value_chars!(proc, should_collapse_and_trim_ws, src_delimiter_pred, process_entity, char_type, {
+        match char_type {
            // This should never happen.
            CharType::End => unreachable!(),

-            // Ignore these; already written by process_entity.
+            // Ignore these; already written by `process_entity`.
            CharType::MalformedEntity => {}
            CharType::DecodedNonAscii => {}

            CharType::Normal(c) => proc.write(c),
            // If unquoted, encode any whitespace anywhere.
            CharType::Whitespace(c) => match optimal_delimiter {
-                DelimiterType::Unquoted => proc.write(ENCODED[c]),
+                DelimiterType::Unquoted => proc.write_slice(ENCODED[&c]),
                _ => proc.write(c),
            },
            // If single quoted, encode any single quote anywhere.
            // If unquoted, encode single quote if first character.
            CharType::SingleQuote => match (optimal_delimiter, char_no) {
-                (DelimiterType::Single, _) | (DelimiterType::Unquoted, 0) => proc.write(ENCODED[b'\'']),
-                _ => proc.write(c),
+                (DelimiterType::Single, _) | (DelimiterType::Unquoted, 0) => proc.write_slice(ENCODED[&b'\'']),
+                _ => proc.write(b'\''),
            },
            // If double quoted, encode any double quote anywhere.
            // If unquoted, encode double quote if first character.
            CharType::DoubleQuote => match (optimal_delimiter, char_no) {
-                (DelimiterType::Double, _) | (DelimiterType::Unquoted, 0) => proc.write(ENCODED[b'"']),
-                _ => proc.write(c),
+                (DelimiterType::Double, _) | (DelimiterType::Unquoted, 0) => proc.write_slice(ENCODED[&b'"']),
+                _ => proc.write(b'"'),
            },
            // If unquoted, encode right chevron if last character.
            CharType::RightChevron => if optimal_delimiter == DelimiterType::Unquoted && char_no == metrics.collected_count - 1 {
-                proc.write(ENCODED[b'>']);
+                proc.write_slice(ENCODED[&b'>']);
            } else {
                proc.write(b'>');
            },
-        },
-    );
+        };
+        char_no += 1;
+    });
    // Ensure closing delimiter in src has been matched and discarded, if any.
    if let Some(c) = src_delimiter {
-        proc.match_char(c).expect().discard();
+        cascade_return!(proc.match_char(c).expect().discard());
    }
    // Write closing delimiter, if any.
    if let Some(c) = optimal_delimiter_char {
--- a/src/unit/bang.rs
+++ b/src/unit/bang.rs
@ -0,0 +1,12 @@
+use crate::proc::Processor;
+use crate::err::HbRes;
+
+pub fn process_bang<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    cascade_return!(proc.match_seq(b"<!").require()?.keep());
+
+    cascade_return!(proc.match_while_not_char(b'>').keep());
+
+    cascade_return!(proc.match_char(b'>').require()?.keep());
+
+    Ok(())
+}
--- a/src/unit/comment.rs
+++ b/src/unit/comment.rs
@ -0,0 +1,13 @@
+use crate::proc::Processor;
+use crate::err::HbRes;
+
+pub fn process_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    cascade_return!(proc.match_seq(b"<!--").expect().discard());
+
+    // TODO Cannot use this pattern
+    cascade_return!(proc.match_while_not_seq(b"-->").discard());
+
+    cascade_return!(proc.match_seq(b"-->").require_with_reason("comment end")?.discard());
+
+    Ok(())
+}
--- a/src/unit/content.rs
+++ b/src/unit/content.rs
@ -0,0 +1,147 @@
+use crate::err::HbRes;
+use crate::proc::{Checkpoint, Processor, ProcessorRange};
+use crate::spec::codepoint::is_whitespace;
+use crate::spec::tag::content::CONTENT_TAGS;
+use crate::spec::tag::formatting::FORMATTING_TAGS;
+use crate::spec::tag::wss::WSS_TAGS;
+use crate::unit::bang::process_bang;
+use crate::unit::comment::process_comment;
+use crate::unit::entity::process_entity;
+use crate::unit::tag::process_tag;
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+enum ContentType {
+    Comment,
+    Bang,
+    OpeningTag,
+
+    Start,
+    End,
+    Entity,
+    Whitespace,
+    Text,
+}
+
+impl ContentType {
+    fn is_comment_bang_opening_tag(&self) -> bool {
+        match self {
+            ContentType::Comment | ContentType::Bang | ContentType::OpeningTag => true,
+            _ => false,
+        }
+    }
+
+    fn derive_next<'d, 'p>(proc: &'p mut Processor<'d>) -> ContentType {
+        // TODO Optimise to trie.
+
+        if proc.at_end() || cascade_return!(proc.match_seq(b"</").matched()) {
+            return ContentType::End;
+        };
+
+        if cascade_return!(proc.match_pred(is_whitespace).matched()) {
+            return ContentType::Whitespace;
+        };
+
+        if cascade_return!(proc.match_seq(b"<!--").matched()) {
+            return ContentType::Comment;
+        };
+
+        // Check after comment
+        if cascade_return!(proc.match_seq(b"<!").matched()) {
+            return ContentType::Bang;
+        };
+
+        // Check after comment and bang
+        if cascade_return!(proc.match_char(b'<').matched()) {
+            return ContentType::OpeningTag;
+        };
+
+        if cascade_return!(proc.match_char(b'&').matched()) {
+            return ContentType::Entity;
+        };
+
+        ContentType::Text
+    }
+}
+
+pub fn process_content<'d, 'p>(proc: &'p mut Processor<'d>, parent: Option<ProcessorRange>) -> HbRes<()> {
+    let should_collapse_whitespace = match parent {
+        Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]),
+        // Should collapse whitespace for root content.
+        None => true,
+    };
+    let should_destroy_whole_whitespace = match parent {
+        Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !CONTENT_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
+        // Should destroy whole whitespace for root content.
+        None => true,
+    };
+    let should_trim_whitespace = match parent {
+        Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
+        None => true,
+    };
+
+    // Trim leading whitespace if configured to do so.
+    if should_trim_whitespace {
+        cascade_return!(proc.match_while_pred(is_whitespace).discard());
+    };
+
+    let mut last_non_whitespace_content_type = ContentType::Start;
+    // Whether or not currently in whitespace.
+    let mut whitespace_checkpoint: Option<Checkpoint> = None;
+
+    loop {
+        let next_content_type = ContentType::derive_next(proc);
+        println!("{:?}", next_content_type);
+
+        if next_content_type == ContentType::Whitespace {
+            // Whitespace is always ignored and then processed afterwards, even if not minifying.
+            proc.skip();
+
+            if let None = whitespace_checkpoint {
+                // This is the start of one or more whitespace characters, so start a view of this contiguous whitespace
+                // and don't write any characters that are part of it yet.
+                whitespace_checkpoint = Some(proc.checkpoint());
+            } else {
+                // This is part of a contiguous whitespace, but not the start of, so simply ignore.
+            }
+            continue;
+        }
+
+        // Next character is not whitespace, so handle any previously ignored whitespace.
+        if let Some(whitespace_start) = whitespace_checkpoint {
+            if should_destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_opening_tag() && next_content_type.is_comment_bang_opening_tag() {
+                // Whitespace is between two tags, comments, or bangs.
+                // destroy_whole_whitespace is on, so don't write it.
+            } else if should_trim_whitespace && (next_content_type == ContentType::End || last_non_whitespace_content_type == ContentType::Start) {
+                // Whitespace is leading or trailing.
+                // should_trim_whitespace is on, so don't write it.
+            } else if should_collapse_whitespace {
+                // Current contiguous whitespace needs to be reduced to a single space character.
+                proc.write(b' ');
+            } else {
+                // Whitespace cannot be minified, so write in entirety.
+                proc.write_skipped(whitespace_start);
+            }
+
+            // Reset whitespace buffer.
+            whitespace_checkpoint = None;
+        };
+
+        // Process and consume next character(s).
+        match next_content_type {
+            ContentType::Comment => { process_comment(proc)?; }
+            ContentType::Bang => { process_bang(proc)?; }
+            ContentType::OpeningTag => { process_tag(proc)?; }
+            ContentType::End => (),
+            ContentType::Entity => { process_entity(proc)?; }
+            ContentType::Text => { proc.accept()?; }
+            _ => unreachable!(),
+        };
+
+        last_non_whitespace_content_type = next_content_type;
+        if next_content_type == ContentType::End {
+            break;
+        };
+    };
+
+    Ok(())
+}
--- a/src/unit/entity.rs
+++ b/src/unit/entity.rs
@ -43,10 +43,10 @@ use crate::proc::Processor;
 use crate::spec::codepoint::{is_digit, is_upper_hex_digit, is_lower_hex_digit, is_hex_digit};
 use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
 use crate::err::HbRes;
-use crate::code::Code;

 const MAX_UNICODE_CODE_POINT: u32 = 0x10FFFF;

+#[derive(Clone, Copy, Eq, PartialEq)]
 enum Type {
    Malformed,
    Name,
@ -57,39 +57,39 @@ enum Type {
 fn parse_decimal(slice: &[u8]) -> Option<u32> {
    let mut val = 0u32;
    for c in slice {
-        val = val * 10 + (c - b'0');
+        val = val * 10 + (c - b'0') as u32;
    }
    if val > MAX_UNICODE_CODE_POINT {
        None
    } else {
-        val
+        Some(val)
    }
 }

 fn parse_hexadecimal(slice: &[u8]) -> Option<u32> {
    let mut val = 0u32;
    for c in slice {
-        let digit: u32 = if is_digit(c) {
+        let digit = if is_digit(*c) {
            c - b'0'
-        } else if is_upper_hex_digit(c) {
+        } else if is_upper_hex_digit(*c) {
            c - b'A' + 10
-        } else if is_lower_hex_digit(c) {
+        } else if is_lower_hex_digit(*c) {
            c - b'a' + 10
        } else {
            unreachable!();
        };
-        val = val * 16 + digit;
-    }
+        val = val * 16 + digit as u32;
+    };
    if val > MAX_UNICODE_CODE_POINT {
        None
    } else {
-        val
+        Some(val)
    }
 }

 // This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
-pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
-    proc.match_char(b'&').expect().discard();
+pub fn parse_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<Option<u32>> {
+    cascade_return!(proc.match_char(b'&').expect().discard());

    // The input can end at any time after initial ampersand.
    // Examples of valid complete source code: "&", "&a", "&#", "&#09",
@ -113,21 +113,21 @@ pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {

    // First stage: determine the type of entity.
    let predicate: fn(u8) -> bool;
-    let entity_type: Type;
+    let mut entity_type: Type;
    let min_len: usize;
    let max_len: usize;

-    if proc.match_seq(b"#x").discard().matched() {
+    if cascade_return!(proc.match_seq(b"#x").discard().matched()) {
        predicate = is_hex_digit;
        entity_type = Type::Hexadecimal;
        min_len = 1;
        max_len = 6;
-    } else if proc.match_char(b'#').discard().matched() {
+    } else if cascade_return!(proc.match_char(b'#').discard().matched()) {
        predicate = is_digit;
        entity_type = Type::Decimal;
        min_len = 1;
        max_len = 7;
-    } else if proc.match_pred(is_valid_entity_reference_name_char).matched() {
+    } else if cascade_return!(proc.match_pred(is_valid_entity_reference_name_char).matched()) {
        predicate = is_valid_entity_reference_name_char;
        entity_type = Type::Name;
        min_len = 2;
@ -136,14 +136,15 @@ pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
        return Ok(None);
    }

-    // Second stage: try to parse a well formed entity.
-    // Malformed entity could be last few characters in code, so allow EOF during entity.
-    let data = proc.match_while_pred(predicate).discard().slice();
-    if data.len() < min_len || data.len() > max_len {
+    // Try consuming semicolon before getting data as slice to prevent issues with borrowing.
+    if !cascade_return!(proc.match_char(b';').discard().matched()) {
        entity_type = Type::Malformed;
    };
-    // Don't try to consume semicolon if entity is not well formed already.
-    if entity_type != Type::Malformed && !proc.match_char(b';').discard().matched() {
+
+    // Second stage: try to parse a well formed entity.
+    // Malformed entity could be last few characters in code, so allow EOF during entity.
+    let data = cascade_return!(proc.match_while_pred(predicate).discard().slice());
+    if data.len() < min_len || data.len() > max_len {
        entity_type = Type::Malformed;
    };

@ -162,7 +163,7 @@ pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
 * @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
 * entity is malformed or invalid
 */
-pub fn process_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
+pub fn process_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<Option<u32>> {
    let checkpoint = proc.checkpoint();
    let parsed = parse_entity(proc)?;

@ -170,7 +171,7 @@ pub fn process_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
        proc.write_utf8(cp);
    } else {
        // Write discarded characters that could not form a well formed entity.
-        checkpoint.write_skipped();
+        proc.write_skipped(checkpoint);
    };

    Ok(parsed)
--- a/src/unit/mod.rs
+++ b/src/unit/mod.rs
@ -0,0 +1,8 @@
+pub mod attr;
+pub mod bang;
+pub mod comment;
+pub mod content;
+pub mod entity;
+pub mod script;
+pub mod style;
+pub mod tag;
--- a/src/unit/script.rs
+++ b/src/unit/script.rs
@ -1,19 +1,18 @@
 use crate::err::{HbRes, HbErr};
 use crate::proc::{Processor};
-use crate::code::Code;

 fn is_string_delimiter(c: u8) -> bool {
    c == b'"' || c == b'\''
 }

-fn parse_comment_single<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    proc.match_seq(b"//").expect().keep();
+fn parse_comment_single<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    cascade_return!(proc.match_seq(b"//").expect().keep());

    // Comment can end at closing </script>.
    // WARNING: Closing tag must not contain whitespace.
    // TODO Optimise
-    while !proc.match_line_terminator().keep().matched() {
-        if proc.match_seq_i(b"</script>").matched() {
+    while !cascade_return!(proc.match_line_terminator().keep().matched()) {
+        if cascade_return!(proc.match_seq(b"</script>").matched()) {
            break;
        }

@ -23,14 +22,14 @@ fn parse_comment_single<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }

-fn parse_comment_multi<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    proc.match_seq(b"/*").expect().keep();
+fn parse_comment_multi<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    cascade_return!(proc.match_seq(b"/*").expect().keep());

    // Comment can end at closing </script>.
    // WARNING: Closing tag must not contain whitespace.
    // TODO Optimise
-    while !proc.match_seq(b"*/").keep().matched() {
-        if proc.match_seq_i(b"</script>").matched() {
+    while !cascade_return!(proc.match_seq(b"*/").keep().matched()) {
+        if cascade_return!(proc.match_seq(b"</script>").matched()) {
            break;
        }

@ -40,8 +39,8 @@ fn parse_comment_multi<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }

-fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    let delim = proc.match_pred(is_string_delimiter).expect().keep().char();
+fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    let delim = cascade_return!(proc.match_pred(is_string_delimiter).expect().keep().char());

    let mut escaping = false;

@ -57,7 +56,7 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
            break;
        }

-        if proc.match_line_terminator().keep().matched() {
+        if cascade_return!(proc.match_line_terminator().keep().matched()) {
            if !escaping {
                return Err(HbErr::ExpectedNotFound("Unterminated JavaScript string"));
            }
@ -69,8 +68,8 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }

-fn parse_template<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    proc.match_char(b'`').expect().keep();
+fn parse_template<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    cascade_return!(proc.match_char(b'`').expect().keep());

    let mut escaping = false;

@ -92,15 +91,15 @@ fn parse_template<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }

-pub fn process_script<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    while !proc.match_seq(b"</").matched() {
-        if proc.match_seq(b"//").matched() {
+pub fn process_script<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    while !cascade_return!(proc.match_seq(b"</").matched()) {
+        if cascade_return!(proc.match_seq(b"//").matched()) {
            parse_comment_single(proc)?;
-        } else if proc.match_seq(b"/*").matched() {
+        } else if cascade_return!(proc.match_seq(b"/*").matched()) {
            parse_comment_multi(proc)?;
-        } else if proc.match_pred(is_string_delimiter).matched() {
+        } else if cascade_return!(proc.match_pred(is_string_delimiter).matched()) {
            parse_string(proc)?;
-        } else if proc.match_char(b'`').matched() {
+        } else if cascade_return!(proc.match_char(b'`').matched()) {
            parse_template(proc)?;
        } else {
            proc.accept()?;
--- a/src/unit/style.rs
+++ b/src/unit/style.rs
@ -1,6 +1,5 @@
 use crate::proc::Processor;
 use crate::err::{HbRes, HbErr};
-use crate::code::Code;

 fn is_string_delimiter(c: u8) -> bool {
    match c {
@ -9,19 +8,19 @@ fn is_string_delimiter(c: u8) -> bool {
    }
 }

-fn parse_comment<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    proc.match_seq(b"/*").expect().keep();
+fn parse_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    cascade_return!(proc.match_seq(b"/*").expect().keep());

    // Unlike script tags, style comments do NOT end at closing tag.
-    while !proc.match_seq(b"*/").keep().matched() {
+    while !cascade_return!(proc.match_seq(b"*/").keep().matched()) {
        proc.accept();
    };

    Ok(())
 }

-fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    let delim = proc.match_pred(is_string_delimiter).expect().keep().char();
+fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    let delim = cascade_return!(proc.match_pred(is_string_delimiter).expect().keep().char());

    let mut escaping = false;

@ -37,7 +36,7 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
            break;
        }

-        if proc.match_line_terminator().keep().matched() {
+        if cascade_return!(proc.match_line_terminator().keep().matched()) {
            if !escaping {
                // TODO Use better error type.
                return Err(HbErr::ExpectedNotFound("Unterminated CSS string"));
@ -50,11 +49,11 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }

-pub fn process_style<D: Code>(proc: &Processor<D>) -> HbRes<()> {
-    while !proc.match_seq(b"</").matched() {
-        if proc.match_seq(b"/*").matched() {
+pub fn process_style<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    while !cascade_return!(proc.match_seq(b"</").matched()) {
+        if cascade_return!(proc.match_seq(b"/*").matched()) {
            parse_comment(proc)?;
-        } else if proc.match_pred(is_string_delimiter).matched() {
+        } else if cascade_return!(proc.match_pred(is_string_delimiter).matched()) {
            parse_string(proc)?;
        } else {
            proc.accept()?;
--- a/src/unit/tag.rs
+++ b/src/unit/tag.rs
@ -1,12 +1,11 @@
-use crate::proc::attr::{AttrType, process_attr};
-use crate::err::{HbRes, HbErr};
+use crate::err::{HbErr, HbRes};
 use crate::proc::Processor;
 use crate::spec::codepoint::{is_alphanumeric, is_whitespace};
-use crate::proc::content::process_content;
-use crate::proc::script::process_script;
-use crate::proc::style::process_style;
 use crate::spec::tag::void::VOID_TAGS;
-use crate::code::Code;
+use crate::unit::attr::{AttrType, process_attr};
+use crate::unit::content::process_content;
+use crate::unit::script::process_script;
+use crate::unit::style::process_style;

 // Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`.
 // See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec.
@ -14,13 +13,12 @@ fn is_valid_tag_name_char(c: u8) -> bool {
    is_alphanumeric(c) || c == b':' || c == b'-'
 }

-fn process_tag_name<'d, D: Code>(proc: &Processor<'d, D>) -> HbRes<&'d [u8]> {
-    Ok(proc.while_pred(is_valid_tag_name_char).require_reason("tag name")?.accept().slice())
-}
-
-pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes<()> {
-    proc.is('<').require().accept();
-    let name = process_tag_name(proc)?;
+pub fn process_tag<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
+    // Expect to be currently at an opening tag.
+    cascade_return!(proc.match_char(b'<').expect().keep())
+    ;
+    // May not be valid tag name at current position, so require instead of expect.
+    let name_token = cascade_return!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("tag name")?.keep().range());

    let mut last_attr_type = AttrType::None;
    let mut self_closing = false;
@ -29,14 +27,15 @@ pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes
        // At the beginning of this loop, the last parsed unit was
        // either the tag name or an attribute (including its value, if
        // it had one).
-        let ws_accepted = proc.match_while_pred(is_whitespace).discard().count();
+        let ws_accepted = cascade_return!(proc.match_while_pred(is_whitespace).discard().matched());

-        if proc.match_char(b'>').keep().matched() {
+        if cascade_return!(proc.match_char(b'>').keep().matched()) {
            // End of tag.
            break;
        }

-        if self_closing = proc.match_seq(b"/>").keep().matched() {
+        self_closing = cascade_return!(proc.match_seq(b"/>").keep().matched());
+        if self_closing {
            break;
        }

@ -52,28 +51,29 @@ pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes
        }

        last_attr_type = process_attr(proc)?;
-    }
+    };

-    if self_closing || VOID_TAGS.contains(&name) {
+    if self_closing || VOID_TAGS.contains(&proc[name_token]) {
        return Ok(());
-    }
+    };

    // TODO WARNING: Tags must be case sensitive.
-    match name {
+    match &proc[name_token] {
        b"script" => process_script(proc)?,
        b"style" => process_style(proc)?,
-        _ => process_content(proc, Some(name))?,
-    }
+        _ => process_content(proc, Some(name_token))?,
+        _ => unreachable!(),
+    };

    // Require closing tag for non-void.
-    proc.match_seq(b"</").require_with_reason("closing tag")?.keep();
-    let closing_name = process_tag_name(proc)?;
-    if name != closing_name {
+    cascade_return!(proc.match_seq(b"</").require_with_reason("closing tag")?.keep());
+    let closing_name = cascade_return!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.keep().slice());
+    if &proc[name_token] != closing_name {
        // TODO Find a way to cleanly provide opening and closing tag
        // names (which are views) into error message without leaking
        // memory.
        return Err(HbErr::UnclosedTag);
-    }
-    proc.match_char(b'>').require_with_reason("closing tag")?.keep();
+    };
+    cascade_return!(proc.match_char(b'>').require_with_reason("closing tag")?.keep());
    Ok(())
 }