Complete initial migration to Rust

2019-12-25 20:44:51 +11:00 · 2019-12-25 20:44:51 +11:00 · 806560dd94
parent d75d62883b
commit 806560dd94
26 changed files with 911 additions and 1027 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -6,3 +6,5 @@ edition = "2018"
 [dependencies]
 phf = { version = "0.8.0", features = ["macros"] }
 cascade = "0.1.4"
 structopt = "0.3.5"
--- a/archive/quoted.rs
+++ b/archive/quoted.rs
@ -1,130 +0,0 @@
 fn tmp() -> () {
    // TODO
    loop {
        let is_whitespace = is_whitespace(c);
        if should_collapse_and_trim_ws && is_whitespace {
            // Character, after any entity decoding, is whitespace.
            // Don't write whitespace.
            // In order to collapse whitespace, only write one space
            // character once the first non-whitespace character
            // after a sequence of whitespace characters is reached.
            last_char_was_whitespace = true;
            proc.skip();
        } else {
            // Character, after any entity decoding, is not whitespace.
            if last_char_was_whitespace {
                // This is the first non-whitespace character after one or more whitespace
                // character(s), so collapse whitespace by writing only one space.
                proc.write(b' ');
                has_whitespace_after_processing = true;
                last_char_was_whitespace = false;
            };
            if c == b'"' {
                count_double_quotation += 1;
            } else if c == b'\'' {
                count_single_quotation += 1;
            } else if is_whitespace {
                // `should_collapse_and_trim_ws` is false, so
                // whitespace is written.
                has_whitespace_after_processing = true;
            };
            increment_count(c);
            if !processed_entity {
                // Don't need to accept if hb_unit_entity has
                // already been called.
                proc.accept();
            };
        };
    }
    // Since it's not possible to optimise the delimiter quotes without
    // knowing the complete value, mark the processed value in the output
    // for post-processing later.
    let proc_value_start = proc.data.get_out_pos();
    let mut is_first_char = true;
    loop {
        let processed_entity = c == b'&';
        if processed_entity {
            // Characters will be consumed by hb_unit_entity, but they will never be '\'', '"', or
            // whitespace, as the function only consumes characters that could form a  well formed
            // entity. See the function for more details.
            // TODO Handle bad char
            let decoded = process_entity(proc)?;
            match decoded {
                Some(e) => if e <= 0x7f { c = e as u8; } else { c = 0xff; },
                None => c = 0xff,
            };
        }
        is_first_char = false;
    };
    let proc_length = proc.data.get_out_pos() + 1 - proc_value_start;
    proc.match_char(delimiter).require()?.discard();
    // Technically, the specification states that values may only be
    // unquoted if they don't contain ["'`=<>]. However, browsers seem to
    // interpret characters after `=` and before the nearest whitespace as
    // an unquoted value, so long as no quote immediately follows `=`. If a
    // value cannot be unquoted, use the one that appears the least and
    // therefore requires the least amount of encoding. Prefer double quotes
    // to single quotes if it's a tie.
    let quote_to_encode;
    let quote_encoded;
    let amount_of_quotes_to_encode;
    if proc_length > 0 && !has_whitespace_after_processing && !starts_with_quote {
        // No need to do any further processing; processed value is
        // already in unquoted form.
        return Ok(AttrType::Unquoted);
    } else if count_single_quotation < count_double_quotation {
        quote_to_encode = b'\'';
        quote_encoded = ENCODED_SINGLE_QUOTE;
        amount_of_quotes_to_encode = count_single_quotation;
    } else {
        quote_to_encode = b'"';
        quote_encoded = ENCODED_DOUBLE_QUOTE;
        amount_of_quotes_to_encode = count_double_quotation;
    }
    // TODO Improve; avoid direct memory access; clean API.
    let post_length = 2 + proc_length - amount_of_quotes_to_encode + (amount_of_quotes_to_encode * quote_encoded.len());
    // Where the post-processed output should start in the output array.
    let out_start = proc_value_start;
    let proc_end = out_start + proc_length - 1;
    let post_end = out_start + post_length - 1;
    let mut reader = proc_end;
    let mut writer = post_end;
    proc.data.set_out_char_at(writer, quote_to_encode);
    writer -= 1;
    // To prevent overwriting data when encoding quotes, post-process output
    // in reverse. Loop condition is checked at end of loop instead of
    // before to prevent underflow. WARNING: This code directly uses and
    // manipulates struct members of `proc`, which in general should be
    // avoided.
    loop {
        let c = proc.data.get_src_char_at(reader);
        if c == quote_to_encode {
            writer -= quote_encoded.len();
            proc.data.replace_out_slice(writer + 1, quote_encoded);
        } else {
            proc.data.set_out_char_at(writer, c);
            writer -= 1;
        }
        // Break before decrementing to prevent underflow.
        if reader == out_start {
            break;
        }
        reader -= 1;
    }
    // This must be done after previous loop to prevent overwriting data.
    proc.data.set_out_char_at(writer, quote_to_encode);
    proc.data.set_out_pos(post_end + 1);
    Ok(AttrType::Quoted)
 }
--- a/src/code.rs
+++ b/src/code.rs
@ -0,0 +1,30 @@
 use std::ops::Range;
 // TODO Inline with proc.
 pub struct Code<'d> {
    pub data: &'d mut [u8],
 }
 impl<'d> Code<'d> {
    pub fn len(&self) -> usize {
        self.data.len()
    }
    pub fn read_char(&self, pos: usize) -> u8 {
        self.data[pos]
    }
    pub fn read_slice(&self, range: Range<usize>) -> &[u8] {
        &self.data[range]
    }
    pub fn copy_within(&mut self, src: Range<usize>, to: usize) {
        self.data.copy_within(src, to);
    }
    pub fn write_char(&mut self, pos: usize, c: u8) -> () {
        self.data[pos] = c;
    }
    pub fn write_slice(&mut self, pos: usize, s: &[u8]) -> () {
        self.data[pos..pos + s.len()].copy_from_slice(s);
    }
 }
--- a/src/code/inplace.rs
+++ b/src/code/inplace.rs
@ -1,10 +0,0 @@
 pub struct CodeInPlace<'data> {
    data: &'data mut [u8],
    read_next: usize,
    // Offset of the next unwritten space.
    write_next: usize,
 }
 impl Code for CodeInPlace {
 }
--- a/src/code/mod.rs
+++ b/src/code/mod.rs
@ -1,57 +0,0 @@
 use std::ops::Range;
 pub trait Code {
    // Unsafe direct memory access.
    // TODO Pos refers to index of next readable.
    unsafe fn get_src_pos(&self) -> usize;
    /// Does NOT check bounds (assumes already checked).
    unsafe fn set_src_pos(&self, pos: usize) -> ();
    unsafe fn get_src_char_at(&self, pos: usize) -> u8;
    /// Get a slice from `start` (inclusive) to `end` (exclusive).
    unsafe fn get_src_slice(&self, range: Range<usize>) -> &[u8];
    // TODO Pos refers to index of next writable.
    unsafe fn get_out_pos(&self) -> usize;
    /// Does NOT check bounds (assumes already checked).
    unsafe fn set_out_pos(&self, pos: usize) -> usize;
    unsafe fn set_out_char_at(&self, pos: usize, c: u8) -> ();
    unsafe fn get_out_mut_slice(&self, range: Range<usize>) -> &mut [u8];
    unsafe fn replace_out_at(&self, pos: usize, s: &[u8]) -> ();
    // Checking bounds.
    fn in_bounds(&self, offset: usize) -> bool;
    fn at_end(&self) -> bool {
        !self.in_bounds(0)
    }
    // Reading.
    /// Get the `offset` character from next.
    /// When `offset` is 0, the next character is returned.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn read(&self, offset: usize) -> u8 {
        self.get_src_char_at(self.get_src_pos() + offset)
    }
    fn maybe_read(&self, offset: usize) -> Option<u8> {
        if self.in_bounds(offset) {
            Some(self.read(offset))
        } else {
            None
        }
    }
    /// Get a slice of the next `count` characters from next.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn read_slice(&self, count: usize) -> &[u8] {
        self.get_src_slice(self.get_src_pos()..self.get_src_pos() + count)
    }
    // Writing.
    /// Move next `amount` characters to output.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn shift(&self, amount: usize) -> ();
    fn write(&self, c: u8) -> ();
    fn write_slice(&self, s: &[u8]) -> ();
    // Skipping.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn consume(&self, amount: usize) -> ();
 }
--- a/src/code/outofplace.rs
+++ b/src/code/outofplace.rs
@ -1,11 +0,0 @@
 pub struct CodeOutOfPlace<'src, 'out> {
    src: &'src [u8],
    src_next: usize,
    out: &'out mut [u8],
    out_next: usize,
 }
 impl Code for CodeOutOfPlace {
 }
--- a/src/err.rs
+++ b/src/err.rs
@ -1,3 +1,4 @@
 #[derive(Debug)]
 pub enum HbErr {
    ExpectedCharNotFound { expected: u8, got: u8 },
    ExpectedMatchNotFound(&'static [u8]),
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,12 +1,13 @@
 use crate::err::HbRes;
 use crate::proc::Processor;
 use crate::unit::content::process_content;
 mod code;
-mod err;
+pub mod err;
 #[macro_use]
 mod proc;
 mod spec;
-
+mod unit;
 use err::HbRes;
 use crate::code::Code;
 use crate::proc::content::process_content;
 use crate::proc::Processor;
 /**
 * Run hyperbuild on an input array and write to {@param output}. Output will be
@ -20,6 +21,8 @@ use crate::proc::Processor;
 * @param cfg configuration to use
 * @return result where to write any resulting error information
 */
-fn hyperbuild<T: Code>(code: &mut T) -> HbRes<()> {
+pub fn hyperbuild<'d>(code: &'d mut [u8]) -> HbRes<usize> {
-    process_content(&Processor { data: code }, None)
+    let mut p = Processor::new(code);
    process_content(&mut p, None)?;
    Ok(p.written_len())
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,27 @@
 use std::fs::File;
 use std::io::{Read, stdin, stdout, Write};
 use structopt::StructOpt;
 use hyperbuild::hyperbuild;
 #[derive(StructOpt)]
 struct Cli {
    #[structopt(short, long, parse(from_os_str))]
    src: std::path::PathBuf,
    #[structopt(short, long, parse(from_os_str))]
    out: std::path::PathBuf,
 }
 fn main() {
    let args = Cli::from_args();
    let mut vec = Vec::<u8>::new();
    let mut src_file = File::open(args.src).expect("could not read source file");
    src_file.read_to_end(&mut vec);
    let mut code = vec.as_mut_slice();
    // TODO
    let result = hyperbuild(code).unwrap();
    println!("{}", result);
    let mut out_file = File::create(args.out).expect("could not open output file");
    out_file.write_all(&code[..result]).expect("could not write to output file");
    println!("Done!")
 }
--- a/src/proc.rs
+++ b/src/proc.rs
@ -0,0 +1,446 @@
 use std::ops::Index;
 use phf::Set;
 use crate::code::Code;
 use crate::err::{HbErr, HbRes};
 macro_rules! cascade_return {
    ($proc:ident $($tail:tt)+) => ({
        cascade_return!(@line $proc, last, $($tail)+);
        last
    });
    // Match `?` operator before a call without `?`.
    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*)? $($tail:tt)+) => {
        $proc.$method($($arg),*)?;
        cascade_return!(@line $proc, $last, $($tail)*);
    };
    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*) $($tail:tt)+) => {
        $proc.$method($($arg),*);
        cascade_return!(@line $proc, $last, $($tail)*);
    };
    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*)?) => {
        let $last = $proc.$method($($arg),*)?;
    };
    (@line $proc:ident, $last:ident, . $method:ident($($arg:expr),*)) => {
        let $last = $proc.$method($($arg),*);
    };
 }
 #[derive(Copy, Clone)]
 pub enum RequireReason {
    Custom,
    ExpectedNotChar(u8),
    ExpectedMatch(&'static [u8]),
    ExpectedChar(u8),
 }
 #[derive(Copy, Clone)]
 struct Match {
    // Need to record start as we might get slice after keeping or skipping.
    start: usize,
    // Guaranteed amount of characters that exist from `start` at time of creation of this struct.
    count: usize,
    // Character matched, if any. Only exists for single-character matches and if matched.
    char: Option<u8>,
    reason: RequireReason,
 }
 #[derive(Copy, Clone)]
 pub struct Checkpoint {
    read_next: usize,
    write_next: usize,
 }
 // TODO DOC
 #[derive(Copy, Clone)]
 pub struct ProcessorRange {
    start: usize,
    end: usize,
 }
 // Processing state of a file. Most fields are used internally and set during
 // processing. Single use only; create one per processing.
 pub struct Processor<'d> {
    code: Code<'d>,
    m: Option<Match>,
    // Index of the next character to read.
    read_next: usize,
    // Index of the next unwritten space.
    write_next: usize,
 }
 fn index_of(s: &'static [u8], c: u8, from: usize) -> Option<usize> {
    for i in from..s.len() {
        if s[i] == c {
            return Some(i);
        };
    };
    None
 }
 // For fast not-matching, ensure that it's possible to continue directly to next character in string
 // when searching for first substring matching pattern in string and only partially matching pattern.
 // For example, given string "abcdabc" and pattern "abcde", normal substring searching would match
 // "abcd", fail, and then start searching from 'b' at index 1. We want to be able to continue searching
 // from 'a' at index 4.
 macro_rules! debug_assert_fast_pattern {
    ($x:expr) => {
        debug_assert!($x.len() > 0 && index_of($x, $x[0], 1) == None);
    }
 }
 impl<'d> Index<ProcessorRange> for Processor<'d> {
    type Output = [u8];
    fn index(&self, index: ProcessorRange) -> &Self::Output {
        self.code.read_slice(index.start..index.end)
    }
 }
 // For consistency and improvement of internal API, only write public functions using internal APIs.
 // Do not call other public Processor methods.
 impl<'d> Processor<'d> {
    // INTERNAL APIs.
    // Checking bounds.
    fn in_bounds(&self, offset: usize) -> bool {
        self.read_next + offset < self.code.len()
    }
    // Reading.
    /// Get the `offset` character from next.
    /// When `offset` is 0, the next character is returned.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn read(&self, offset: usize) -> u8 {
        self.code.read_char(self.read_next + offset)
    }
    fn maybe_read(&self, offset: usize) -> Option<u8> {
        if self.in_bounds(offset) {
            Some(self.read(offset))
        } else {
            None
        }
    }
    // Writing.
    /// Move next `amount` characters to output.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn shift(&mut self, amount: usize) -> () {
        self.code.copy_within(self.read_next..self.read_next + amount, self.write_next);
        self.read_next += amount;
    }
    // Skipping.
    /// Panics. Does not check bounds for performance (e.g. already checked).
    fn consume(&mut self, amount: usize) -> () {
        self.read_next += amount;
    }
    pub fn new(code: &mut [u8]) -> Processor {
        Processor { write_next: 0, read_next: 0, code: Code { data: code }, m: None }
    }
    pub fn at_end(&self) -> bool {
        !self.in_bounds(0)
    }
    pub fn written_len(&self) -> usize {
        self.write_next
    }
    // Use match
    // Query
    pub fn matched(&self) -> bool {
        self.m.unwrap().count > 0
    }
    pub fn length(&self) -> usize {
        self.m.unwrap().count
    }
    pub fn char(&self) -> u8 {
        self.m.unwrap().char.unwrap()
    }
    pub fn maybe_char(&self) -> Option<u8> {
        self.m.unwrap().char
    }
    pub fn range(&self) -> ProcessorRange {
        let m = self.m.unwrap();
        ProcessorRange { start: m.start, end: m.start + m.count }
    }
    pub fn slice(&self) -> &[u8] {
        let m = self.m.unwrap();
        self.code.read_slice(m.start..m.start + m.count)
    }
    // Assert
    fn _require(&self, custom_reason: Option<&'static str>) -> HbRes<()> {
        let m = self.m.unwrap();
        if m.count > 0 {
            Ok(())
        } else {
            match m.reason {
                RequireReason::Custom => Err(HbErr::ExpectedNotFound(custom_reason.unwrap())),
                RequireReason::ExpectedNotChar(c) => Err(HbErr::ExpectedCharNotFound { expected: c, got: m.char.unwrap() }),
                RequireReason::ExpectedChar(c) => Err(HbErr::UnexpectedCharFound(c)),
                RequireReason::ExpectedMatch(m) => Err(HbErr::ExpectedMatchNotFound(m)),
            }
        }
    }
    pub fn require(&self) -> HbRes<()> {
        self._require(None)
    }
    pub fn require_with_reason(&self, reason: &'static str) -> HbRes<()> {
        self._require(Some(reason))
    }
    // TODO Document
    pub fn expect(&self) -> () {
        // TODO Maybe debug_assert?
        assert!(self.m.unwrap().count > 0);
    }
    // Commit.
    // Note that m.count has already been verified to be valid, so don't need to bounds check again.
    pub fn keep(&mut self) -> () {
        self.shift(self.m.unwrap().count);
    }
    pub fn discard(&mut self) -> () {
        self.read_next = self.m.unwrap().start + self.m.unwrap().count;
    }
    // Helper internal functions for match_* API.
    fn _new_match(&mut self, count: usize, char: Option<u8>, reason: RequireReason) -> () {
        // Don't assert match doesn't exist, as otherwise we would need to clear match on every use
        // which would slow down performance and require mutable methods for querying match.
        let start = self.read_next;
        self.m = Some(Match { start, count, char, reason });
    }
    fn _match_one<C: FnOnce(u8) -> bool>(&mut self, cond: C, reason: RequireReason) -> () {
        match self.maybe_read(0).filter(|n| cond(*n)) {
            Some(c) => self._new_match(1, Some(c), reason),
            None => self._new_match(0, None, reason),
        }
    }
    fn _match_greedy<C: Fn(u8) -> bool>(&mut self, cond: C) -> () {
        let mut count = 0usize;
        while self.in_bounds(count) && cond(self.read(count)) {
            count += 1;
        };
        self._new_match(count, None, RequireReason::Custom)
    }
    // Single-char matching API.
    pub fn match_char(&mut self, c: u8) -> () {
        self._match_one(|n| n == c, RequireReason::ExpectedChar(c))
    }
    pub fn match_not_char(&mut self, c: u8) -> () {
        self._match_one(|n| n != c, RequireReason::ExpectedNotChar(c))
    }
    pub fn match_member(&mut self, set: Set<u8>) -> () {
        self._match_one(|n| set.contains(&n), RequireReason::Custom)
    }
    pub fn match_not_member(&mut self, set: Set<u8>) -> () {
        self._match_one(|n| !set.contains(&n), RequireReason::Custom)
    }
    pub fn match_pred(&mut self, pred: fn(u8) -> bool) -> () {
        self._match_one(|n| pred(n), RequireReason::Custom)
    }
    pub fn match_not_pred(&mut self, pred: fn(u8) -> bool) -> () {
        self._match_one(|n| !pred(n), RequireReason::Custom)
    }
    // Match a sequence of characters.
    pub fn match_seq(&mut self, pat: &'static [u8]) -> () {
        debug_assert_fast_pattern!(pat);
        // For faster short-circuiting matching, compare char-by-char instead of slices.
        let len = pat.len();
        let mut count = 0;
        if len > 0 && self.in_bounds(len - 1) {
            for i in 0..len {
                if self.read(i) != pat[i] {
                    count = 0;
                    break;
                };
                count += 1;
            };
        };
        self._new_match(count, None, RequireReason::Custom)
    }
    pub fn match_line_terminator(&mut self) -> () {
        self._new_match(match self.maybe_read(0) {
            Some(b'\n') => 1,
            Some(b'\r') => 1 + self.maybe_read(1).filter(|c| *c == b'\n').is_some() as usize,
            _ => 0,
        }, None, RequireReason::Custom)
    }
    // Multi-char matching API.
    pub fn match_while_char(&mut self, c: u8) -> () {
        self._match_greedy(|n| n == c)
    }
    pub fn match_while_not_char(&mut self, c: u8) -> () {
        self._match_greedy(|n| n != c)
    }
    pub fn match_while_member(&mut self, set: Set<u8>) -> () {
        self._match_greedy(|n| set.contains(&n))
    }
    pub fn match_while_not_member(&mut self, set: Set<u8>) -> () {
        self._match_greedy(|n| !set.contains(&n))
    }
    pub fn match_while_pred(&mut self, pred: fn(u8) -> bool) -> () {
        self._match_greedy(pred)
    }
    pub fn match_while_not_seq(&mut self, s: &'static [u8]) -> () {
        debug_assert_fast_pattern!(s);
        // TODO Test
        // TODO Document
        let mut count = 0usize;
        let mut srcpos = 0usize;
        // Next character in pattern to match.
        // For example, if `patpos` is 2, we've matched 2 characters so far and need to match character at index 2 in pattern with character `srcpos` in code.
        let mut patpos = 0usize;
        while self.in_bounds(srcpos) {
            if self.read(srcpos) == s[patpos] {
                if patpos == s.len() - 1 {
                    // Matched last character in pattern i.e. whole pattern.
                    break;
                } else {
                    srcpos += 1;
                    patpos += 1;
                }
            } else {
                count += patpos;
                if patpos == 0 {
                    count += 1;
                    srcpos += 1;
                } else {
                    patpos = 0;
                };
            };
        };
        self._new_match(count, None, RequireReason::Custom)
    }
    pub fn checkpoint(&self) -> Checkpoint {
        Checkpoint {
            read_next: self.read_next,
            write_next: self.write_next,
        }
    }
    pub fn restore(&mut self, checkpoint: Checkpoint) -> () {
        self.read_next = checkpoint.read_next;
        self.write_next = checkpoint.write_next;
    }
    /// Write characters skipped from source since checkpoint. Must not have written anything since checkpoint.
    pub fn write_skipped(&mut self, checkpoint: Checkpoint) -> () {
        // Make sure that nothing has been written since checkpoint (which would be lost).
        debug_assert_eq!(self.write_next, checkpoint.write_next);
        // Get src code from checkpoint until last consumed character (inclusive).
        self.code.copy_within(checkpoint.read_next..self.read_next, checkpoint.write_next);
    }
    /// Discard characters written since checkpoint but keep source position.
    pub fn erase_written(&mut self, checkpoint: Checkpoint) -> () {
        self.write_next = checkpoint.write_next;
    }
    pub fn consumed_count(&self, checkpoint: Checkpoint) -> usize {
        self.read_next - checkpoint.read_next
    }
    pub fn written_count(&self, checkpoint: Checkpoint) -> usize {
        self.write_next - checkpoint.write_next
    }
    /// Get the `offset` character from next.
    /// When `offset` is 0, the next character is returned.
    pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
        self.maybe_read(offset)
    }
    pub fn peek_offset(&self, offset: usize) -> HbRes<u8> {
        self.maybe_read(offset).ok_or(HbErr::UnexpectedEnd)
    }
    pub fn peek_eof(&self) -> Option<u8> {
        self.maybe_read(0)
    }
    pub fn peek(&self) -> HbRes<u8> {
        self.maybe_read(0).ok_or(HbErr::UnexpectedEnd)
    }
    /// Skip the next `count` characters (can be zero).
    /// Will result in an error if exceeds bounds.
    pub fn skip_amount(&mut self, count: usize) -> HbRes<()> {
        // Check for zero to prevent underflow as type is usize.
        if count == 0 || self.in_bounds(count - 1) {
            self.consume(count);
            Ok(())
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
    /// Skip and return the next character.
    /// Will result in an error if exceeds bounds.
    pub fn skip(&mut self) -> HbRes<u8> {
        if !self.at_end() {
            let c = self.read(0);
            self.consume(1);
            Ok(c)
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
    /// Write `c` to output. Will panic if exceeds bounds.
    pub fn write(&mut self, c: u8) -> () {
        self.code.write_char(self.write_next, c);
    }
    /// Write `s` to output. Will panic if exceeds bounds.
    pub fn write_slice(&mut self, s: &[u8]) -> () {
        self.code.write_slice(self.write_next, s);
    }
    /// Does not check if `c` is a valid Unicode code point.
    pub fn write_utf8(&mut self, c: u32) -> () {
        // Don't use char::encode_utf8 as it requires a valid code point,
        // and requires passing a [u8, 4] which might be heap-allocated.
        if c <= 0x7F {
            // Plain ASCII.
            self.write(c as u8);
        } else if c <= 0x07FF {
            // 2-byte UTF-8.
            self.write((((c >> 6) & 0x1F) | 0xC0) as u8);
            self.write((((c >> 0) & 0x3F) | 0x80) as u8);
        } else if c <= 0xFFFF {
            // 3-byte UTF-8.
            self.write((((c >> 12) & 0x0F) | 0xE0) as u8);
            self.write((((c >> 6) & 0x3F) | 0x80) as u8);
            self.write((((c >> 0) & 0x3F) | 0x80) as u8);
        } else if c <= 0x10FFFF {
            // 4-byte UTF-8.
            self.write((((c >> 18) & 0x07) | 0xF0) as u8);
            self.write((((c >> 12) & 0x3F) | 0x80) as u8);
            self.write((((c >> 6) & 0x3F) | 0x80) as u8);
            self.write((((c >> 0) & 0x3F) | 0x80) as u8);
        } else {
            unreachable!();
        }
    }
    pub fn accept(&mut self) -> HbRes<u8> {
        if !self.at_end() {
            let c = self.read(0);
            self.shift(1);
            Ok(c)
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
    pub fn accept_amount(&mut self, count: usize) -> HbRes<()> {
        // Check for zero to prevent underflow as type is usize.
        if count == 0 || self.in_bounds(count - 1) {
            self.shift(count);
            Ok(())
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
 }
--- a/src/proc/attr/mod.rs
+++ b/src/proc/attr/mod.rs
@ -1,48 +0,0 @@
 use crate::proc::Processor;
 use crate::err::HbRes;
 use crate::spec::codepoint::is_control;
 use crate::code::Code;
 use crate::proc::attr::quoted::{is_attr_quote, process_quoted_val};
 use crate::proc::attr::unquoted::process_attr_unquoted_val;
 mod quoted;
 mod unquoted;
 pub enum AttrType {
    // Special value for hb_unit_tag.
    None,
    Quoted,
    Unquoted,
    NoValue,
 }
 // Characters allowed in an attribute name.
 // NOTE: Unicode noncharacters not tested.
 // See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
 fn is_name_char(c: u8) -> bool {
    match c {
        b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
        c => !is_control(c),
    }
 }
 pub fn process_attr<D: Code>(proc: &Processor<D>) -> HbRes<AttrType> {
    let name = proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().slice();
    let should_collapse_and_trim_value_ws = name.eq_ignore_ascii_case(b"class");
    let has_value = proc.match_char(b'=').keep().matched();
    if !has_value {
        Ok(AttrType::NoValue)
    } else {
        if proc.match_pred(is_attr_quote).matched() {
            // Quoted attribute value.
            process_quoted_val(proc, should_collapse_and_trim_value_ws)
        } else {
            // Unquoted attribute value.
            process_attr_unquoted_val(proc)?;
            Ok(AttrType::Unquoted)
        }
    }
 }
--- a/src/proc/attr/unquoted.rs
+++ b/src/proc/attr/unquoted.rs
@ -1,36 +0,0 @@
 use crate::proc::Processor;
 use crate::err::{HbRes, HbErr};
 use crate::spec::codepoint::is_whitespace;
 use crate::code::Code;
 use crate::proc::entity::process_entity;
 // Characters not allowed in an unquoted attribute value.
 // See https://html.spec.whatwg.org/multipage/syntax.html#unquoted for spec.
 fn is_valid_unquoted_value_char(c: u8) -> bool {
    match c {
        b'"' | b'\'' | b'`' | b'=' | b'<' | b'>' => true,
        c => !is_whitespace(c),
    }
 }
 // TODO Unquoted could be optimised to quoted if used entities to encode illegal chars.
 pub fn process_attr_unquoted_val<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    let mut at_least_one_char = false;
    loop {
        if proc.match_char(b'&').matched() {
            // Process entity.
            // TODO Entity could decode to illegal character.
            process_entity(proc);
        } else if !proc.match_pred(is_valid_unquoted_value_char).keep().matched() {
            break;
        }
        at_least_one_char = true;
    }
    if !at_least_one_char {
        Err(HbErr::ExpectedNotFound("Expected unquoted attribute value"))
    } else {
        Ok(())
    }
 }
--- a/src/proc/bang.rs
+++ b/src/proc/bang.rs
@ -1,13 +0,0 @@
 use crate::proc::Processor;
 use crate::code::Code;
 use crate::err::HbRes;
 pub fn process_bang<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    proc.match_seq(b"<!").require()?.keep();
    proc.match_while_not_char(b'>').keep();
    proc.match_char(b'>').require()?.keep();
    Ok(())
 }
--- a/src/proc/comment.rs
+++ b/src/proc/comment.rs
@ -1,14 +0,0 @@
 use crate::proc::Processor;
 use crate::code::Code;
 use crate::err::HbRes;
 pub fn process_comment<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    proc.match_seq(b"<!--").expect().discard();
    // TODO Cannot use this pattern
    proc.match_while_not_seq(b"-->").discard();
    proc.match_seq(b"-->").require_with_reason("comment end")?.discard();
    Ok(())
 }
--- a/src/proc/content.rs
+++ b/src/proc/content.rs
@ -1,156 +0,0 @@
 use crate::code::Code;
 use crate::proc::Processor;
 use crate::spec::codepoint::is_whitespace;
 use crate::proc::comment::process_comment;
 use crate::proc::bang::process_bang;
 use crate::proc::entity::process_entity;
 use crate::proc::tag::process_tag;
 use crate::err::HbRes;
 use crate::spec::tag::wss::WSS_TAGS;
 use crate::spec::tag::content::CONTENT_TAGS;
 use crate::spec::tag::formatting::FORMATTING_TAGS;
 #[derive(PartialEq)]
 enum State {
 	Comment,
 	Bang,
 	OpeningTag,
 	Start,
 	End,
 	Entity,
 	Whitespace,
 	Text,
 }
 impl State {
 	fn is_comment_bang_opening_tag(&self) -> bool {
 		match self {
 			State::Comment | State::Bang | State::OpeningTag => true,
 			_ => false,
 		}
 	}
 	fn next_state<D: Code>(proc: &Processor<D>) -> State {
 		// TODO Optimise to trie.
 		if proc.data.at_end() || proc.match_seq(b"</").matched() {
 			return State::End;
 		}
 		if proc.match_pred(is_whitespace).matched() {
 			return State::Whitespace;
 		}
 		if proc.match_seq(b"<!--").matched() {
 			return State::Comment;
 		}
 		// Check after comment
 		if proc.match_seq(b"<!").matched() {
 			return State::Bang;
 		};
 		// Check after comment and bang
 		if proc.match_char(b'<').matched() {
 			return State::OpeningTag;
 		};
 		if proc.match_char(b'&').matched() {
 			return State::Entity;
 		};
 		return State::Text;
 	}
 }
 /*
 * Whitespace handling is the trickiest part of this function.
 * There are three potential minification settings that affect whitespace
 * handling:
 *   - collapse
 *   - destroy whole
 *   - trim
 * What whitespace to minify depends on the parent and configured settings.
 * We want to prevent memory allocation and use only one pass, but whitespace
 * handling often involves looking ahead.
 */
 pub fn process_content<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes<()> {
 	let should_collapse_whitespace = parent.filter(|p| !WSS_TAGS.contains(p)).is_some();
 	let should_destroy_whole_whitespace = parent.filter(|p| !WSS_TAGS.contains(p) && !CONTENT_TAGS.contains(p) && !FORMATTING_TAGS.contains(p)).is_some();
 	let should_trim_whitespace = parent.filter(|p| !WSS_TAGS.contains(p) && !FORMATTING_TAGS.contains(p)).is_some();
 	// Trim leading whitespace if configured to do so.
 	if should_trim_whitespace {
 		proc.match_while_pred(is_whitespace).discard();
 	};
 	let mut last_state = State::Start;
 	// Whether or not currently in whitespace.
 	let mut whitespace_start = None;
 	// If currently in whitespace, whether or not current contiguous
 	// whitespace started after a bang, comment, or tag.
 	let mut whitespace_started_after_cbot = false;
 	loop {
 		let next_state = State::next_state(proc);
 		if next_state == State::Whitespace {
 			// Whitespace is always buffered and then processed
 			// afterwards, even if not minifying.
 			proc.skip();
 			if last_state != State::Whitespace {
 				// This is the start of one or more whitespace
 				// characters, so start a view of this
 				// contiguous whitespace and don't write any
 				// characters that are part of it yet.
 				whitespace_start = Some(proc.start_read_slice());
 				whitespace_started_after_cbot = last_state.is_comment_bang_opening_tag();
 			} else {
 				// This is part of a contiguous whitespace, but
 				// not the start of, so simply ignore.
 			}
 		} else {
 			// Next character is not whitespace, so handle any
 			// previously buffered whitespace.
 			if let Some(whitespace_buffered) = whitespace_start {
 				if should_destroy_whole_whitespace && whitespace_started_after_cbot && next_state.is_comment_bang_opening_tag() {
 					// Whitespace is between two tags, comments, or bangs.
 					// destroy_whole_whitespace is on, so don't write it.
 				} else if should_trim_whitespace && next_state == State::End {
 					// Whitespace is trailing.
 					// should_trim_whitespace is on, so don't write it.
 				} else if should_collapse_whitespace {
 					// Current contiguous whitespace needs to be reduced to a single space character.
 					proc.write(b' ');
 				} else {
 					// Whitespace cannot be minified, so
 					// write in entirety.
 					proc.write_slice(proc.get_slice(whitespace_buffered));
 				}
 				// Reset whitespace buffer.
 				whitespace_start = None;
 			};
 			// Process and consume next character(s).
 			match next_state {
 				State::Comment => process_comment(proc),
 				State::Bang => process_bang(proc),
 				State::OpeningTag => process_tag(proc, parent),
 				State::End => (),
 				State::Entity => process_entity(proc),
 				State::Text => proc.accept(),
 				_ => unreachable!(),
 			};
 		};
 		last_state = next_state;
 		if next_state == State::End {
 			break;
 		};
 	};
 	Ok(())
 }
--- a/src/proc/mod.rs
+++ b/src/proc/mod.rs
@ -1,368 +0,0 @@
 use crate::err::{HbErr, HbRes};
 use phf::Set;
 use crate::code::Code;
 pub mod attr;
 pub mod bang;
 pub mod comment;
 pub mod content;
 pub mod entity;
 pub mod script;
 pub mod style;
 pub mod tag;
 pub enum RequireReason {
    Custom,
    ExpectedNotChar(u8),
    ExpectedMatch(&'static [u8]),
    ExpectedChar(u8),
 }
 struct Match<'d, D: Code> {
    data: &'d mut D,
    // Need to record start as we might get slice after keeping or skipping.
    start: usize,
    // Guaranteed amount of characters that exist from `start` at time of creation of this struct.
    count: usize,
    // Character matched, if any. Only exists for single-character matches and if matched.
    char: Option<u8>,
    reason: RequireReason,
 }
 impl<D: Code> Match<'_, D> {
    // Query
    pub fn matched(&self) -> bool {
        self.count > 0
    }
    pub fn length(&self) -> usize {
        self.count
    }
    pub fn char(&self) -> u8 {
        self.char.unwrap()
    }
    pub fn maybe_char(&self) -> Option<u8> {
        self.char
    }
    pub fn slice(&self) -> &[u8] {
        self.data.get_src_slice(self.start..self.start + self.count)
    }
    // Assert
    fn _require(&self, custom_reason: Option<&'static str>) -> HbRes<&Self> {
        if self.count > 0 {
            Ok(self)
        } else {
            match self.reason {
                RequireReason::Custom => Err(HbErr::ExpectedNotFound(custom_reason.unwrap())),
                RequireReason::ExpectedNotChar(c) => Err(HbErr::ExpectedCharNotFound {
                    expected: c,
                    got: self.char.unwrap(),
                }),
                RequireReason::ExpectedChar(c) => Err(HbErr::UnexpectedCharFound(c)),
                RequireReason::ExpectedMatch(m) => Err(HbErr::ExpectedMatchNotFound(m)),
            }
        }
    }
    pub fn require(&self) -> HbRes<&Self> {
        self._require(None)
    }
    pub fn require_with_reason(&self, reason: &'static str) -> HbRes<&Self> {
        self._require(Some(reason))
    }
    // TODO Document
    pub fn expect(&self) -> &Self {
        // TODO Maybe debug_assert?
        assert!(self.count > 0);
        self
    }
    // Commit.
    // Note that self.count has already been verified to be valid, so don't need to bounds check again.
    pub fn keep(&self) -> &Self {
        self.data.shift(self.count);
        self
    }
    pub fn discard(&self) -> &Self {
        self.data.set_src_pos(self.count);
        self
    }
 }
 struct Checkpoint<'d, D: Code> {
    data: &'d mut D,
    src_pos: usize,
    out_pos: usize,
 }
 impl<D: Code> Checkpoint<'_, D> {
    pub fn restore(&self) -> () {
        self.data.set_src_pos(self.src_pos);
        self.data.set_out_pos(self.out_pos);
    }
    /// Write characters skipped from source since checkpoint. Must not have written anything since checkpoint.
    pub fn write_skipped(&self) -> () {
        // Make sure that nothing has been written since checkpoint (which would be lost).
        debug_assert_eq!(self.data.get_out_pos(), self.out_pos);
        // Get src code from checkpoint until last consumed character (inclusive).
        let skipped = self.data.get_src_slice(self.src_pos..self.data.get_src_pos());
        self.data.write_slice(skipped);
    }
    /// Discard characters written since checkpoint but keep source position.
    pub fn erase_written(&self) -> () {
        self.data.set_out_pos(self.out_pos);
    }
    pub fn consumed_count(&self) -> usize {
        self.data.get_src_pos() - self.src_pos
    }
    pub fn written_count(&self) -> usize {
        self.data.get_out_pos() - self.out_pos
    }
 }
 // Processing state of a file. Most fields are used internally and set during
 // processing. Single use only; create one per processing.
 pub struct Processor<'data, D: Code> {
    pub data: &'data mut D,
 }
 fn index_of(s: &'static [u8], c: u8, from: usize) -> Option<usize> {
    for i in from..s.len() {
        if s[i] == c {
            return Some(i);
        };
    };
    None
 }
 // For fast not-matching, ensure that it's possible to continue directly to next character in string
 // when searching for first substring matching pattern in string and only partially matching pattern.
 // For example, given string "abcdabc" and pattern "abcde", normal substring searching would match
 // "abcd", fail, and then start searching from 'b' at index 1. We want to be able to continue searching
 // from 'a' at index 4.
 macro_rules! debug_assert_fast_pattern {
    ($x:expr) => {
        debug_assert!($x.len() > 0 && index_of($x, $x[0], 1) == None);
    }
 }
 // For consistency and improvement of underlying API, only write methods in terms of the underlying API (Code methods). Do not call other Proc methods.
 // TODO Return refs for matches.
 impl<D: Code> Processor<'_, D> {
    // Helper internal functions for match_* API.
    fn _new_match(&self, count: usize, char: Option<u8>, reason: RequireReason) -> Match<D> {
        Match {
            data: self.data,
            start: self.data.get_src_pos(),
            count,
            char,
            reason,
        }
    }
    fn _match_one<C: FnOnce(u8) -> bool>(&self, cond: C, reason: RequireReason) -> Match<D> {
        let m = self.data.maybe_read(0).filter(|n| cond(*n));
        self._new_match(m.is_some() as usize, m, reason)
    }
    fn _match_greedy<C: FnOnce(u8) -> bool>(&self, cond: C) -> Match<D> {
        let mut count = 0usize;
        while self.data.in_bounds(count) && cond(self.data.read(count)) {
            count += 1;
        };
        self._new_match(count, None, RequireReason::Custom)
    }
    // Single-char matching API.
    pub fn match_char(&self, c: u8) -> Match<D> {
        self._match_one(|n| n == c, RequireReason::ExpectedChar(c))
    }
    pub fn match_not_char(&self, c: u8) -> Match<D> {
        self._match_one(|n| n != c, RequireReason::ExpectedNotChar(c))
    }
    pub fn match_member(&self, set: Set<u8>) -> Match<D> {
        self._match_one(|n| set.contains(&n), RequireReason::Custom)
    }
    pub fn match_not_member(&self, set: Set<u8>) -> Match<D> {
        self._match_one(|n| !set.contains(&n), RequireReason::Custom)
    }
    pub fn match_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
        self._match_one(|n| pred(n), RequireReason::Custom)
    }
    pub fn match_not_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
        self._match_one(|n| !pred(n), RequireReason::Custom)
    }
    // Match a sequence of characters.
    pub fn match_seq(&self, pat: &'static [u8]) -> Match<D> {
        debug_assert_fast_pattern!(pat);
        // For faster short-circuiting matching, compare char-by-char instead of slices.
        let len = pat.len();
        let mut count = 0;
        if len > 0 && self.data.in_bounds(len - 1) {
            for i in 0..len {
                if self.data.read(i) != pat[i] {
                    count = 0;
                    break;
                };
                count += 1;
            };
        };
        self._new_match(count, None, RequireReason::Custom)
    }
    pub fn match_line_terminator(&self) -> Match<D> {
        self._new_match(match self.data.maybe_read(0) {
            Some(b'\n') => 1,
            Some(b'\r') => 1 + self.data.maybe_read(1).filter(|c| *c == b'\n').is_some() as usize,
            _ => 0,
        }, None, RequireReason::Custom)
    }
    // Multi-char matching API.
    pub fn match_while_char(&self, c: u8) -> Match<D> {
        self._match_greedy(|n| n == c)
    }
    pub fn match_while_not_char(&self, c: u8) -> Match<D> {
        self._match_greedy(|n| n != c)
    }
    pub fn match_while_member(&self, set: Set<u8>) -> Match<D> {
        self._match_greedy(|n| set.contains(&n))
    }
    pub fn match_while_not_member(&self, set: Set<u8>) -> Match<D> {
        self._match_greedy(|n| !set.contains(&n))
    }
    pub fn match_while_pred(&self, pred: fn(u8) -> bool) -> Match<D> {
        self._match_greedy(pred)
    }
    pub fn match_while_not_seq(&self, s: &'static [u8]) -> Match<D> {
        debug_assert_fast_pattern!(s);
        // TODO Test
        // TODO Document
        let mut count = 0usize;
        let mut srcpos = 0usize;
        // Next character in pattern to match.
        // For example, if `patpos` is 2, we've matched 2 characters so far and need to match character at index 2 in pattern with character `srcpos` in code.
        let mut patpos = 0usize;
        while self.data.in_bounds(srcpos) {
            if self.data.read(srcpos) == s[patpos] {
                if patpos == s.len() - 1 {
                    // Matched last character in pattern i.e. whole pattern.
                    break;
                } else {
                    srcpos += 1;
                    patpos += 1;
                }
            } else {
                count += patpos;
                if patpos == 0 {
                    count += 1;
                    srcpos += 1;
                } else {
                    patpos = 0;
                };
            };
        };
        self._new_match(count, None, RequireReason::Custom)
    }
    pub fn checkpoint(&self) -> Checkpoint<D> {
        Checkpoint {
            data: self.data,
            src_pos: self.data.get_src_pos(),
            out_pos: self.data.get_out_pos(),
        }
    }
    /// Get the `offset` character from next.
    /// When `offset` is 0, the next character is returned.
    pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
        self.data.maybe_read(offset)
    }
    pub fn peek_offset(&self, offset: usize) -> HbRes<u8> {
        self.data.maybe_read(offset).ok_or(HbErr::UnexpectedEnd)
    }
    pub fn peek_eof(&self) -> Option<u8> {
        self.data.maybe_read(0)
    }
    pub fn peek(&self) -> HbRes<u8> {
        self.data.maybe_read(0).ok_or(HbErr::UnexpectedEnd)
    }
    /// Skip the next `count` characters (can be zero).
    /// Will result in an error if exceeds bounds.
    pub fn skip_amount(&self, count: usize) -> HbRes<()> {
        // Check for zero to prevent underflow as type is usize.
        if count == 0 || self.data.in_bounds(count - 1) {
            self.data.consume(count);
            Ok(())
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
    /// Skip and return the next character.
    /// Will result in an error if exceeds bounds.
    pub fn skip(&self) -> HbRes<u8> {
        if !self.data.at_end() {
            let c = self.data.read(0);
            self.data.consume(1);
            Ok(c)
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
    /// Write `c` to output. Will panic if exceeds bounds.
    pub fn write(&self, c: u8) -> () {
        self.data.write(c)
    }
    /// Write `s` to output. Will panic if exceeds bounds.
    pub fn write_slice(&self, s: &[u8]) -> () {
        self.data.write_slice(s)
    }
    /// Does not check if `c` is a valid Unicode code point.
    pub fn write_utf8(&self, c: u32) -> () {
        // Don't use char::encode_utf8 as it requires a valid code point,
        // and requires passing a [u8, 4] which might be heap-allocated.
        if c <= 0x7F {
            // Plain ASCII.
            self.data.write(c as u8);
        } else if c <= 0x07FF {
            // 2-byte UTF-8.
            self.data.write((((c >> 6) & 0x1F) | 0xC0) as u8);
            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
        } else if c <= 0xFFFF {
            // 3-byte UTF-8.
            self.data.write((((c >> 12) & 0x0F) | 0xE0) as u8);
            self.data.write((((c >> 6) & 0x3F) | 0x80) as u8);
            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
        } else if c <= 0x10FFFF {
            // 4-byte UTF-8.
            self.data.write((((c >> 18) & 0x07) | 0xF0) as u8);
            self.data.write((((c >> 12) & 0x3F) | 0x80) as u8);
            self.data.write((((c >> 6) & 0x3F) | 0x80) as u8);
            self.data.write((((c >> 0) & 0x3F) | 0x80) as u8);
        } else {
            unreachable!();
        }
    }
    pub fn accept(&self) -> HbRes<u8> {
        if !self.data.at_end() {
            let c = self.data.read(0);
            self.data.shift(1);
            Ok(c)
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
    pub fn accept_amount(&self, count: usize) -> HbRes<()> {
        // Check for zero to prevent underflow as type is usize.
        if count == 0 || self.data.in_bounds(count - 1) {
            self.data.shift(count);
            Ok(())
        } else {
            Err(HbErr::UnexpectedEnd)
        }
    }
 }
--- a/src/unit/attr/mod.rs
+++ b/src/unit/attr/mod.rs
@ -0,0 +1,46 @@
 use crate::proc::Processor;
 use crate::err::HbRes;
 use crate::spec::codepoint::is_control;
 use phf::{Set, phf_set};
 use crate::unit::attr::value::process_attr_value;
 mod value;
 static COLLAPSIBLE_AND_TRIMMABLE_ATTRS: Set<&'static [u8]> = phf_set! {
    b"class",
 };
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub enum AttrType {
    // Special value for `process_tag`.
    None,
    Quoted,
    Unquoted,
    NoValue,
 }
 // Characters allowed in an attribute name.
 // NOTE: Unicode noncharacters not tested.
 // See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
 fn is_name_char(c: u8) -> bool {
    match c {
        b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
        c => !is_control(c),
    }
 }
 pub fn process_attr<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<AttrType> {
    // Expect `process_attr` to be called at an attribute.
    let name = cascade_return!(proc.match_while_pred(is_name_char).expect().keep().slice());
    // TODO DOC Attr must be case sensitive
    let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(name);
    let has_value = cascade_return!(proc.match_char(b'=').keep().matched());
    if !has_value {
        Ok(AttrType::NoValue)
    } else {
        process_attr_value(proc, should_collapse_and_trim_value_ws)
    }
 }
--- a/src/proc/attr/quoted.rs
+++ b/src/proc/attr/quoted.rs
@ -1,11 +1,10 @@
-use crate::proc::{Processor, Match};
+use phf::{Map, phf_map};
-use crate::proc::attr::AttrType;
+
 use crate::code::Code;
 use crate::spec::codepoint::is_whitespace;
 use crate::proc::entity::{process_entity, parse_entity};
 use crate::err::HbRes;
-use phf::Map;
+use crate::proc::Processor;
-use std::thread::current;
+use crate::spec::codepoint::is_whitespace;
 use crate::unit::attr::AttrType;
 use crate::unit::entity::{parse_entity, process_entity};
 pub fn is_double_quote(c: u8) -> bool {
    c == b'"'
@ -31,14 +30,14 @@ static ENCODED: Map<u8, &'static [u8]> = phf_map! {
    b'"' => b"&#34;",
    b'>' => b"&gt;",
    // Whitespace characters as defined by spec in crate::spec::codepoint::is_whitespace.
-    0x09 => b"&#9;",
+    b'\x09' => b"&#9;",
-    0x0a => b"&#10;",
+    b'\x0a' => b"&#10;",
-    0x0c => b"&#12;",
+    b'\x0c' => b"&#12;",
-    0x0d => b"&#13;",
+    b'\x0d' => b"&#13;",
-    0x20 => b"&#32;",
+    b'\x20' => b"&#32;",
 };
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Eq, PartialEq)]
 enum CharType {
    End,
    MalformedEntity,
@ -58,12 +57,12 @@ impl CharType {
            b'"' => CharType::DoubleQuote,
            b'\'' => CharType::SingleQuote,
            b'>' => CharType::RightChevron,
-            c => if is_whitespace(c) { CharType::Whitespace(c) } else { CharType::Normal },
+            c => if is_whitespace(c) { CharType::Whitespace(c) } else { CharType::Normal(c) },
        }
    }
 }
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Eq, PartialEq)]
 enum DelimiterType {
    Double,
    Single,
@ -91,14 +90,14 @@ impl Metrics {
        match char_type {
            CharType::Whitespace(c) => {
                self.count_whitespace += 1;
-                self.total_whitespace_encoded_length += ENCODED[c].len();
+                self.total_whitespace_encoded_length += ENCODED[&c].len();
            }
            CharType::SingleQuote => self.count_single_quotation += 1,
            CharType::DoubleQuote => self.count_double_quotation += 1,
            _ => (),
        };
-        if self.first_char_type == None {
+        if let None = self.first_char_type {
            self.first_char_type = Some(char_type);
        };
        self.last_char_type = Some(char_type);
@ -110,13 +109,13 @@ impl Metrics {
        // NOTE: Don't need to consider whitespace for either as all whitespace will be encoded and counts as part of `total_whitespace_encoded_length`.
        let first_char_encoding_cost = match self.first_char_type {
            // WARNING: Change `first_char_is_quote_encoded` if changing here.
-            Some(CharType::DoubleQuote) => ENCODED[b'"'].len(),
+            Some(CharType::DoubleQuote) => ENCODED[&b'"'].len(),
-            Some(CharType::SingleQuote) => ENCODED[b'\''].len(),
+            Some(CharType::SingleQuote) => ENCODED[&b'\''].len(),
            _ => 0,
        };
        let first_char_is_quote_encoded = first_char_encoding_cost > 0;
-        let last_char_encoding_cost = match last_char_type {
+        let last_char_encoding_cost = match self.last_char_type {
-            Some(CharType::RightChevron) => ENCODED[b'>'].len(),
+            Some(CharType::RightChevron) => ENCODED[&b'>'].len(),
            _ => 0,
        };
@ -131,11 +130,11 @@ impl Metrics {
    }
    fn single_quoted_cost(&self) -> usize {
-        self.count_single_quotation * ENCODED[b'\''].len() + self.count_double_quotation + self.count_whitespace
+        self.count_single_quotation * ENCODED[&b'\''].len() + self.count_double_quotation + self.count_whitespace
    }
    fn double_quoted_cost(&self) -> usize {
-        self.count_double_quotation * ENCODED[b'"'].len() + self.count_single_quotation + self.count_whitespace
+        self.count_double_quotation * ENCODED[&b'"'].len() + self.count_single_quotation + self.count_whitespace
    }
    fn get_optimal_delimiter_type(&self) -> DelimiterType {
@ -156,61 +155,59 @@ impl Metrics {
    }
 }
-fn consume_attr_value<D: Code>(
+macro_rules! consume_attr_value_chars {
-    proc: &Processor<D>,
+    ($proc:ident, $should_collapse_and_trim_ws:ident, $delimiter_pred:ident, $entity_processor:ident, $out_char_type:ident, $on_char:block) => {
-    should_collapse_and_trim_ws: bool,
+        // Set to true when one or more immediately previous characters were whitespace and deferred for processing after the contiguous whitespace.
-    delimiter_pred: fn(u8) -> bool,
+        // NOTE: Only used if `should_collapse_and_trim_ws`.
-    on_entity: fn(&Processor<D>) -> HbRes<Option<u32>>,
+        let mut currently_in_whitespace = false;
-    on_char: fn(char_type: CharType, char_no: usize) -> (),
+        // Needed to check if at beginning of value so that leading whitespace can be trimmed instead of collapsed.
-) -> HbRes<()> {
+        // NOTE: Only used if `should_collapse_and_trim_ws`.
-    // Set to true when one or more immediately previous characters were whitespace and deferred for processing after the contiguous whitespace.
+        let mut currently_first_char = true;
    // NOTE: Only used if `should_collapse_and_trim_ws`.
    let mut currently_in_whitespace = false;
    let mut char_no = 0;
    loop {
        let char_type = if proc.match_pred(delimiter_pred).matched() {
            // DO NOT BREAK HERE. More processing is done afterwards upon reaching end.
            CharType::End
        } else if proc.match_char(b'&').matched() {
            match on_entity(proc)? {
                Some(e) => if e <= 0x7f { CharType::from_char(e as u8) } else { CharType::DecodedNonAscii },
                None => CharType::MalformedEntity,
            }
        } else {
            CharType::from_char(proc.skip()?)
        };
-        if should_collapse_and_trim_ws {
+        loop {
-            if let CharType::Whitespace(_) = char_type {
+            let char_type = if cascade_return!($proc.match_pred($delimiter_pred).matched()) {
-                // Ignore this whitespace character, but mark the fact that we are currently in contiguous whitespace.
+                // DO NOT BREAK HERE. More processing is done afterwards upon reaching end.
-                currently_in_whitespace = true;
+                CharType::End
-                continue;
+            } else if cascade_return!($proc.match_char(b'&').matched()) {
                match $entity_processor($proc)? {
                    Some(e) => if e <= 0x7f { CharType::from_char(e as u8) } else { CharType::DecodedNonAscii },
                    None => CharType::MalformedEntity,
                }
            } else {
-                // Now past whitespace (e.g. moved to non-whitespace char or end of attribute value). Either:
+                CharType::from_char($proc.skip()?)
-                // - ignore contiguous whitespace (i.e. do nothing) if we are currently at beginning or end of value; or
+            };
-                // - collapse contiguous whitespace (i.e. count as one whitespace char) otherwise.
+
-                if currently_in_whitespace && first_char_type != None && char_type != CharType::End {
+            if $should_collapse_and_trim_ws {
-                    // Collect current collapsed contiguous whitespace that was ignored previously.
+                if let CharType::Whitespace(_) = char_type {
-                    on_char(CharType::Whitespace(b' '), char_no);
+                    // Ignore this whitespace character, but mark the fact that we are currently in contiguous whitespace.
-                    char_no += 1;
+                    currently_in_whitespace = true;
                    continue;
                } else {
                    // Now past whitespace (e.g. moved to non-whitespace char or end of attribute value). Either:
                    // - ignore contiguous whitespace (i.e. do nothing) if we are currently at beginning or end of value; or
                    // - collapse contiguous whitespace (i.e. count as one whitespace char) otherwise.
                    if currently_in_whitespace && !currently_first_char && char_type != CharType::End {
                        // Collect current collapsed contiguous whitespace that was ignored previously.
                        $out_char_type = CharType::Whitespace(b' ');
                        $on_char;
                    };
                    currently_in_whitespace = false;
                };
-                currently_in_whitespace = false;
+            };
            match char_type {
                CharType::End => break,
                char_type => {
                    $out_char_type = char_type;
                    $on_char;
                    currently_first_char = false;
                }
            };
        };
        if char_type == CharType::End {
            break;
        } else {
            on_char(char_type, char_no);
            char_no += 1;
        };
    };
    Ok(())
 }
-// TODO Might encounter danger if Unicode whitespace is considered as whitespace.
+pub fn process_attr_value<'d, 'p>(proc: &'p mut Processor<'d>, should_collapse_and_trim_ws: bool) -> HbRes<AttrType> {
 pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim_ws: bool) -> HbRes<AttrType> {
    // Processing a quoted attribute value is tricky, due to the fact that
    // it's not possible to know whether or not to unquote the value until
    // the value has been processed. For example, decoding an entity could
@ -227,7 +224,7 @@ pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim
    // 4. Post-process the output by adding delimiter quotes and encoding
    // quotes in values. This does mean that the output is written to twice.
-    let src_delimiter = proc.match_pred(is_attr_quote).discard().maybe_char();
+    let src_delimiter = cascade_return!(proc.match_pred(is_attr_quote).discard().maybe_char());
    let src_delimiter_pred = match src_delimiter {
        Some(b'"') => is_double_quote,
        Some(b'\'') => is_single_quote,
@ -246,16 +243,13 @@ pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim
        last_char_type: None,
        collected_count: 0,
    };
-    consume_attr_value(
+    let mut char_type;
-        proc,
+    consume_attr_value_chars!(proc, should_collapse_and_trim_ws, src_delimiter_pred, parse_entity, char_type, {
-        should_collapse_and_trim_ws,
+        metrics.collect_char_type(char_type);
-        src_delimiter_pred,
+    });
        parse_entity,
        |char_type, _| metrics.collect_char_type(char_type),
    )?;
    // Stage 2: optimally minify attribute value using metrics.
-    value_start_checkpoint.restore();
+    proc.restore(value_start_checkpoint);
    let optimal_delimiter = metrics.get_optimal_delimiter_type();
    let optimal_delimiter_char = match optimal_delimiter {
        DelimiterType::Double => Some(b'"'),
@ -266,48 +260,47 @@ pub fn process_quoted_val<D: Code>(proc: &Processor<D>, should_collapse_and_trim
    if let Some(c) = optimal_delimiter_char {
        proc.write(c);
    }
-    consume_attr_value(
+    let mut char_type;
-        proc,
+    let mut char_no = 0;
-        should_collapse_and_trim_ws,
+    consume_attr_value_chars!(proc, should_collapse_and_trim_ws, src_delimiter_pred, process_entity, char_type, {
-        src_delimiter_pred,
+        match char_type {
        process_entity,
        |char_type, char_no| match char_type {
            // This should never happen.
            CharType::End => unreachable!(),
-            // Ignore these; already written by process_entity.
+            // Ignore these; already written by `process_entity`.
            CharType::MalformedEntity => {}
            CharType::DecodedNonAscii => {}
            CharType::Normal(c) => proc.write(c),
            // If unquoted, encode any whitespace anywhere.
            CharType::Whitespace(c) => match optimal_delimiter {
-                DelimiterType::Unquoted => proc.write(ENCODED[c]),
+                DelimiterType::Unquoted => proc.write_slice(ENCODED[&c]),
                _ => proc.write(c),
            },
            // If single quoted, encode any single quote anywhere.
            // If unquoted, encode single quote if first character.
            CharType::SingleQuote => match (optimal_delimiter, char_no) {
-                (DelimiterType::Single, _) | (DelimiterType::Unquoted, 0) => proc.write(ENCODED[b'\'']),
+                (DelimiterType::Single, _) | (DelimiterType::Unquoted, 0) => proc.write_slice(ENCODED[&b'\'']),
-                _ => proc.write(c),
+                _ => proc.write(b'\''),
            },
            // If double quoted, encode any double quote anywhere.
            // If unquoted, encode double quote if first character.
            CharType::DoubleQuote => match (optimal_delimiter, char_no) {
-                (DelimiterType::Double, _) | (DelimiterType::Unquoted, 0) => proc.write(ENCODED[b'"']),
+                (DelimiterType::Double, _) | (DelimiterType::Unquoted, 0) => proc.write_slice(ENCODED[&b'"']),
-                _ => proc.write(c),
+                _ => proc.write(b'"'),
            },
            // If unquoted, encode right chevron if last character.
            CharType::RightChevron => if optimal_delimiter == DelimiterType::Unquoted && char_no == metrics.collected_count - 1 {
-                proc.write(ENCODED[b'>']);
+                proc.write_slice(ENCODED[&b'>']);
            } else {
                proc.write(b'>');
            },
-        },
+        };
-    );
+        char_no += 1;
    });
    // Ensure closing delimiter in src has been matched and discarded, if any.
    if let Some(c) = src_delimiter {
-        proc.match_char(c).expect().discard();
+        cascade_return!(proc.match_char(c).expect().discard());
    }
    // Write closing delimiter, if any.
    if let Some(c) = optimal_delimiter_char {
--- a/src/unit/bang.rs
+++ b/src/unit/bang.rs
@ -0,0 +1,12 @@
 use crate::proc::Processor;
 use crate::err::HbRes;
 pub fn process_bang<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
    cascade_return!(proc.match_seq(b"<!").require()?.keep());
    cascade_return!(proc.match_while_not_char(b'>').keep());
    cascade_return!(proc.match_char(b'>').require()?.keep());
    Ok(())
 }
--- a/src/unit/comment.rs
+++ b/src/unit/comment.rs
@ -0,0 +1,13 @@
 use crate::proc::Processor;
 use crate::err::HbRes;
 pub fn process_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
    cascade_return!(proc.match_seq(b"<!--").expect().discard());
    // TODO Cannot use this pattern
    cascade_return!(proc.match_while_not_seq(b"-->").discard());
    cascade_return!(proc.match_seq(b"-->").require_with_reason("comment end")?.discard());
    Ok(())
 }
--- a/src/unit/content.rs
+++ b/src/unit/content.rs
@ -0,0 +1,147 @@
 use crate::err::HbRes;
 use crate::proc::{Checkpoint, Processor, ProcessorRange};
 use crate::spec::codepoint::is_whitespace;
 use crate::spec::tag::content::CONTENT_TAGS;
 use crate::spec::tag::formatting::FORMATTING_TAGS;
 use crate::spec::tag::wss::WSS_TAGS;
 use crate::unit::bang::process_bang;
 use crate::unit::comment::process_comment;
 use crate::unit::entity::process_entity;
 use crate::unit::tag::process_tag;
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 enum ContentType {
    Comment,
    Bang,
    OpeningTag,
    Start,
    End,
    Entity,
    Whitespace,
    Text,
 }
 impl ContentType {
    fn is_comment_bang_opening_tag(&self) -> bool {
        match self {
            ContentType::Comment | ContentType::Bang | ContentType::OpeningTag => true,
            _ => false,
        }
    }
    fn derive_next<'d, 'p>(proc: &'p mut Processor<'d>) -> ContentType {
        // TODO Optimise to trie.
        if proc.at_end() || cascade_return!(proc.match_seq(b"</").matched()) {
            return ContentType::End;
        };
        if cascade_return!(proc.match_pred(is_whitespace).matched()) {
            return ContentType::Whitespace;
        };
        if cascade_return!(proc.match_seq(b"<!--").matched()) {
            return ContentType::Comment;
        };
        // Check after comment
        if cascade_return!(proc.match_seq(b"<!").matched()) {
            return ContentType::Bang;
        };
        // Check after comment and bang
        if cascade_return!(proc.match_char(b'<').matched()) {
            return ContentType::OpeningTag;
        };
        if cascade_return!(proc.match_char(b'&').matched()) {
            return ContentType::Entity;
        };
        ContentType::Text
    }
 }
 pub fn process_content<'d, 'p>(proc: &'p mut Processor<'d>, parent: Option<ProcessorRange>) -> HbRes<()> {
    let should_collapse_whitespace = match parent {
        Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]),
        // Should collapse whitespace for root content.
        None => true,
    };
    let should_destroy_whole_whitespace = match parent {
        Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !CONTENT_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
        // Should destroy whole whitespace for root content.
        None => true,
    };
    let should_trim_whitespace = match parent {
        Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]) && !FORMATTING_TAGS.contains(&proc[tag_name]),
        None => true,
    };
    // Trim leading whitespace if configured to do so.
    if should_trim_whitespace {
        cascade_return!(proc.match_while_pred(is_whitespace).discard());
    };
    let mut last_non_whitespace_content_type = ContentType::Start;
    // Whether or not currently in whitespace.
    let mut whitespace_checkpoint: Option<Checkpoint> = None;
    loop {
        let next_content_type = ContentType::derive_next(proc);
        println!("{:?}", next_content_type);
        if next_content_type == ContentType::Whitespace {
            // Whitespace is always ignored and then processed afterwards, even if not minifying.
            proc.skip();
            if let None = whitespace_checkpoint {
                // This is the start of one or more whitespace characters, so start a view of this contiguous whitespace
                // and don't write any characters that are part of it yet.
                whitespace_checkpoint = Some(proc.checkpoint());
            } else {
                // This is part of a contiguous whitespace, but not the start of, so simply ignore.
            }
            continue;
        }
        // Next character is not whitespace, so handle any previously ignored whitespace.
        if let Some(whitespace_start) = whitespace_checkpoint {
            if should_destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_opening_tag() && next_content_type.is_comment_bang_opening_tag() {
                // Whitespace is between two tags, comments, or bangs.
                // destroy_whole_whitespace is on, so don't write it.
            } else if should_trim_whitespace && (next_content_type == ContentType::End || last_non_whitespace_content_type == ContentType::Start) {
                // Whitespace is leading or trailing.
                // should_trim_whitespace is on, so don't write it.
            } else if should_collapse_whitespace {
                // Current contiguous whitespace needs to be reduced to a single space character.
                proc.write(b' ');
            } else {
                // Whitespace cannot be minified, so write in entirety.
                proc.write_skipped(whitespace_start);
            }
            // Reset whitespace buffer.
            whitespace_checkpoint = None;
        };
        // Process and consume next character(s).
        match next_content_type {
            ContentType::Comment => { process_comment(proc)?; }
            ContentType::Bang => { process_bang(proc)?; }
            ContentType::OpeningTag => { process_tag(proc)?; }
            ContentType::End => (),
            ContentType::Entity => { process_entity(proc)?; }
            ContentType::Text => { proc.accept()?; }
            _ => unreachable!(),
        };
        last_non_whitespace_content_type = next_content_type;
        if next_content_type == ContentType::End {
            break;
        };
    };
    Ok(())
 }
--- a/src/unit/entity.rs
+++ b/src/unit/entity.rs
@ -43,10 +43,10 @@ use crate::proc::Processor;
 use crate::spec::codepoint::{is_digit, is_upper_hex_digit, is_lower_hex_digit, is_hex_digit};
 use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
 use crate::err::HbRes;
 use crate::code::Code;
 const MAX_UNICODE_CODE_POINT: u32 = 0x10FFFF;
 #[derive(Clone, Copy, Eq, PartialEq)]
 enum Type {
    Malformed,
    Name,
@ -57,39 +57,39 @@ enum Type {
 fn parse_decimal(slice: &[u8]) -> Option<u32> {
    let mut val = 0u32;
    for c in slice {
-        val = val * 10 + (c - b'0');
+        val = val * 10 + (c - b'0') as u32;
    }
    if val > MAX_UNICODE_CODE_POINT {
        None
    } else {
-        val
+        Some(val)
    }
 }
 fn parse_hexadecimal(slice: &[u8]) -> Option<u32> {
    let mut val = 0u32;
    for c in slice {
-        let digit: u32 = if is_digit(c) {
+        let digit = if is_digit(*c) {
            c - b'0'
-        } else if is_upper_hex_digit(c) {
+        } else if is_upper_hex_digit(*c) {
            c - b'A' + 10
-        } else if is_lower_hex_digit(c) {
+        } else if is_lower_hex_digit(*c) {
            c - b'a' + 10
        } else {
            unreachable!();
        };
-        val = val * 16 + digit;
+        val = val * 16 + digit as u32;
-    }
+    };
    if val > MAX_UNICODE_CODE_POINT {
        None
    } else {
-        val
+        Some(val)
    }
 }
 // This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
-pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
+pub fn parse_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<Option<u32>> {
-    proc.match_char(b'&').expect().discard();
+    cascade_return!(proc.match_char(b'&').expect().discard());
    // The input can end at any time after initial ampersand.
    // Examples of valid complete source code: "&", "&a", "&#", "&#09",
@ -113,21 +113,21 @@ pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
    // First stage: determine the type of entity.
    let predicate: fn(u8) -> bool;
-    let entity_type: Type;
+    let mut entity_type: Type;
    let min_len: usize;
    let max_len: usize;
-    if proc.match_seq(b"#x").discard().matched() {
+    if cascade_return!(proc.match_seq(b"#x").discard().matched()) {
        predicate = is_hex_digit;
        entity_type = Type::Hexadecimal;
        min_len = 1;
        max_len = 6;
-    } else if proc.match_char(b'#').discard().matched() {
+    } else if cascade_return!(proc.match_char(b'#').discard().matched()) {
        predicate = is_digit;
        entity_type = Type::Decimal;
        min_len = 1;
        max_len = 7;
-    } else if proc.match_pred(is_valid_entity_reference_name_char).matched() {
+    } else if cascade_return!(proc.match_pred(is_valid_entity_reference_name_char).matched()) {
        predicate = is_valid_entity_reference_name_char;
        entity_type = Type::Name;
        min_len = 2;
@ -136,14 +136,15 @@ pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
        return Ok(None);
    }
-    // Second stage: try to parse a well formed entity.
+    // Try consuming semicolon before getting data as slice to prevent issues with borrowing.
-    // Malformed entity could be last few characters in code, so allow EOF during entity.
+    if !cascade_return!(proc.match_char(b';').discard().matched()) {
    let data = proc.match_while_pred(predicate).discard().slice();
    if data.len() < min_len || data.len() > max_len {
        entity_type = Type::Malformed;
    };
-    // Don't try to consume semicolon if entity is not well formed already.
+
-    if entity_type != Type::Malformed && !proc.match_char(b';').discard().matched() {
+    // Second stage: try to parse a well formed entity.
    // Malformed entity could be last few characters in code, so allow EOF during entity.
    let data = cascade_return!(proc.match_while_pred(predicate).discard().slice());
    if data.len() < min_len || data.len() > max_len {
        entity_type = Type::Malformed;
    };
@ -162,7 +163,7 @@ pub fn parse_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
 * @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
 * entity is malformed or invalid
 */
-pub fn process_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
+pub fn process_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<Option<u32>> {
    let checkpoint = proc.checkpoint();
    let parsed = parse_entity(proc)?;
@ -170,7 +171,7 @@ pub fn process_entity<D: Code>(proc: &Processor<D>) -> HbRes<Option<u32>> {
        proc.write_utf8(cp);
    } else {
        // Write discarded characters that could not form a well formed entity.
-        checkpoint.write_skipped();
+        proc.write_skipped(checkpoint);
    };
    Ok(parsed)
--- a/src/unit/mod.rs
+++ b/src/unit/mod.rs
@ -0,0 +1,8 @@
 pub mod attr;
 pub mod bang;
 pub mod comment;
 pub mod content;
 pub mod entity;
 pub mod script;
 pub mod style;
 pub mod tag;
--- a/src/unit/script.rs
+++ b/src/unit/script.rs
@ -1,19 +1,18 @@
 use crate::err::{HbRes, HbErr};
 use crate::proc::{Processor};
 use crate::code::Code;
 fn is_string_delimiter(c: u8) -> bool {
    c == b'"' || c == b'\''
 }
-fn parse_comment_single<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+fn parse_comment_single<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    proc.match_seq(b"//").expect().keep();
+    cascade_return!(proc.match_seq(b"//").expect().keep());
    // Comment can end at closing </script>.
    // WARNING: Closing tag must not contain whitespace.
    // TODO Optimise
-    while !proc.match_line_terminator().keep().matched() {
+    while !cascade_return!(proc.match_line_terminator().keep().matched()) {
-        if proc.match_seq_i(b"</script>").matched() {
+        if cascade_return!(proc.match_seq(b"</script>").matched()) {
            break;
        }
@ -23,14 +22,14 @@ fn parse_comment_single<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }
-fn parse_comment_multi<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+fn parse_comment_multi<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    proc.match_seq(b"/*").expect().keep();
+    cascade_return!(proc.match_seq(b"/*").expect().keep());
    // Comment can end at closing </script>.
    // WARNING: Closing tag must not contain whitespace.
    // TODO Optimise
-    while !proc.match_seq(b"*/").keep().matched() {
+    while !cascade_return!(proc.match_seq(b"*/").keep().matched()) {
-        if proc.match_seq_i(b"</script>").matched() {
+        if cascade_return!(proc.match_seq(b"</script>").matched()) {
            break;
        }
@ -40,8 +39,8 @@ fn parse_comment_multi<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }
-fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    let delim = proc.match_pred(is_string_delimiter).expect().keep().char();
+    let delim = cascade_return!(proc.match_pred(is_string_delimiter).expect().keep().char());
    let mut escaping = false;
@ -57,7 +56,7 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
            break;
        }
-        if proc.match_line_terminator().keep().matched() {
+        if cascade_return!(proc.match_line_terminator().keep().matched()) {
            if !escaping {
                return Err(HbErr::ExpectedNotFound("Unterminated JavaScript string"));
            }
@ -69,8 +68,8 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }
-fn parse_template<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+fn parse_template<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    proc.match_char(b'`').expect().keep();
+    cascade_return!(proc.match_char(b'`').expect().keep());
    let mut escaping = false;
@ -92,15 +91,15 @@ fn parse_template<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }
-pub fn process_script<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+pub fn process_script<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    while !proc.match_seq(b"</").matched() {
+    while !cascade_return!(proc.match_seq(b"</").matched()) {
-        if proc.match_seq(b"//").matched() {
+        if cascade_return!(proc.match_seq(b"//").matched()) {
            parse_comment_single(proc)?;
-        } else if proc.match_seq(b"/*").matched() {
+        } else if cascade_return!(proc.match_seq(b"/*").matched()) {
            parse_comment_multi(proc)?;
-        } else if proc.match_pred(is_string_delimiter).matched() {
+        } else if cascade_return!(proc.match_pred(is_string_delimiter).matched()) {
            parse_string(proc)?;
-        } else if proc.match_char(b'`').matched() {
+        } else if cascade_return!(proc.match_char(b'`').matched()) {
            parse_template(proc)?;
        } else {
            proc.accept()?;
--- a/src/unit/style.rs
+++ b/src/unit/style.rs
@ -1,6 +1,5 @@
 use crate::proc::Processor;
 use crate::err::{HbRes, HbErr};
 use crate::code::Code;
 fn is_string_delimiter(c: u8) -> bool {
    match c {
@ -9,19 +8,19 @@ fn is_string_delimiter(c: u8) -> bool {
    }
 }
-fn parse_comment<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+fn parse_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    proc.match_seq(b"/*").expect().keep();
+    cascade_return!(proc.match_seq(b"/*").expect().keep());
    // Unlike script tags, style comments do NOT end at closing tag.
-    while !proc.match_seq(b"*/").keep().matched() {
+    while !cascade_return!(proc.match_seq(b"*/").keep().matched()) {
        proc.accept();
    };
    Ok(())
 }
-fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    let delim = proc.match_pred(is_string_delimiter).expect().keep().char();
+    let delim = cascade_return!(proc.match_pred(is_string_delimiter).expect().keep().char());
    let mut escaping = false;
@ -37,7 +36,7 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
            break;
        }
-        if proc.match_line_terminator().keep().matched() {
+        if cascade_return!(proc.match_line_terminator().keep().matched()) {
            if !escaping {
                // TODO Use better error type.
                return Err(HbErr::ExpectedNotFound("Unterminated CSS string"));
@ -50,11 +49,11 @@ fn parse_string<D: Code>(proc: &Processor<D>) -> HbRes<()> {
    Ok(())
 }
-pub fn process_style<D: Code>(proc: &Processor<D>) -> HbRes<()> {
+pub fn process_style<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    while !proc.match_seq(b"</").matched() {
+    while !cascade_return!(proc.match_seq(b"</").matched()) {
-        if proc.match_seq(b"/*").matched() {
+        if cascade_return!(proc.match_seq(b"/*").matched()) {
            parse_comment(proc)?;
-        } else if proc.match_pred(is_string_delimiter).matched() {
+        } else if cascade_return!(proc.match_pred(is_string_delimiter).matched()) {
            parse_string(proc)?;
        } else {
            proc.accept()?;
--- a/src/unit/tag.rs
+++ b/src/unit/tag.rs
@ -1,12 +1,11 @@
-use crate::proc::attr::{AttrType, process_attr};
+use crate::err::{HbErr, HbRes};
 use crate::err::{HbRes, HbErr};
 use crate::proc::Processor;
 use crate::spec::codepoint::{is_alphanumeric, is_whitespace};
 use crate::proc::content::process_content;
 use crate::proc::script::process_script;
 use crate::proc::style::process_style;
 use crate::spec::tag::void::VOID_TAGS;
-use crate::code::Code;
+use crate::unit::attr::{AttrType, process_attr};
 use crate::unit::content::process_content;
 use crate::unit::script::process_script;
 use crate::unit::style::process_style;
 // Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`.
 // See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec.
@ -14,13 +13,12 @@ fn is_valid_tag_name_char(c: u8) -> bool {
    is_alphanumeric(c) || c == b':' || c == b'-'
 }
-fn process_tag_name<'d, D: Code>(proc: &Processor<'d, D>) -> HbRes<&'d [u8]> {
+pub fn process_tag<'d, 'p>(proc: &'p mut Processor<'d>) -> HbRes<()> {
-    Ok(proc.while_pred(is_valid_tag_name_char).require_reason("tag name")?.accept().slice())
+    // Expect to be currently at an opening tag.
-}
+    cascade_return!(proc.match_char(b'<').expect().keep())
-
+    ;
-pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes<()> {
+    // May not be valid tag name at current position, so require instead of expect.
-    proc.is('<').require().accept();
+    let name_token = cascade_return!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("tag name")?.keep().range());
    let name = process_tag_name(proc)?;
    let mut last_attr_type = AttrType::None;
    let mut self_closing = false;
@ -29,14 +27,15 @@ pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes
        // At the beginning of this loop, the last parsed unit was
        // either the tag name or an attribute (including its value, if
        // it had one).
-        let ws_accepted = proc.match_while_pred(is_whitespace).discard().count();
+        let ws_accepted = cascade_return!(proc.match_while_pred(is_whitespace).discard().matched());
-        if proc.match_char(b'>').keep().matched() {
+        if cascade_return!(proc.match_char(b'>').keep().matched()) {
            // End of tag.
            break;
        }
-        if self_closing = proc.match_seq(b"/>").keep().matched() {
+        self_closing = cascade_return!(proc.match_seq(b"/>").keep().matched());
        if self_closing {
            break;
        }
@ -52,28 +51,29 @@ pub fn process_tag<D: Code>(proc: &Processor<D>, parent: Option<&[u8]>) -> HbRes
        }
        last_attr_type = process_attr(proc)?;
-    }
+    };
-    if self_closing || VOID_TAGS.contains(&name) {
+    if self_closing || VOID_TAGS.contains(&proc[name_token]) {
        return Ok(());
-    }
+    };
    // TODO WARNING: Tags must be case sensitive.
-    match name {
+    match &proc[name_token] {
        b"script" => process_script(proc)?,
        b"style" => process_style(proc)?,
-        _ => process_content(proc, Some(name))?,
+        _ => process_content(proc, Some(name_token))?,
-    }
+        _ => unreachable!(),
    };
    // Require closing tag for non-void.
-    proc.match_seq(b"</").require_with_reason("closing tag")?.keep();
+    cascade_return!(proc.match_seq(b"</").require_with_reason("closing tag")?.keep());
-    let closing_name = process_tag_name(proc)?;
+    let closing_name = cascade_return!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.keep().slice());
-    if name != closing_name {
+    if &proc[name_token] != closing_name {
        // TODO Find a way to cleanly provide opening and closing tag
        // names (which are views) into error message without leaking
        // memory.
        return Err(HbErr::UnclosedTag);
-    }
+    };
-    proc.match_char(b'>').require_with_reason("closing tag")?.keep();
+    cascade_return!(proc.match_char(b'>').require_with_reason("closing tag")?.keep());
    Ok(())
 }