diff --git a/src/pattern.rs b/src/pattern.rs index 8acb847..3ca8f82 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -13,15 +13,6 @@ pub enum TrieNodeMatch { NotFound { reached: usize }, } -impl TrieNodeMatch { - pub fn found(&self) -> bool { - match self { - TrieNodeMatch::Found { .. } => true, - TrieNodeMatch::NotFound { .. } => false, - } - } -} - impl TrieNode { // Find the node that matches the shortest prefix of {@param text} that: // - has a value (except the start node if it has a value); diff --git a/src/proc/checkpoint.rs b/src/proc/checkpoint.rs index 812ca6a..47699d6 100644 --- a/src/proc/checkpoint.rs +++ b/src/proc/checkpoint.rs @@ -3,7 +3,9 @@ use crate::proc::range::ProcessorRange; #[derive(Copy, Clone)] pub struct Checkpoint { - read_next: usize, + // Avoid implementing a read position checkpoint, as source code does get modified (e.g. normalising entities), and + // there's no check to see if source has since been overwritten (e.g. writing over source and then restoring earlier + // write position). write_next: usize, } @@ -17,7 +19,6 @@ impl Checkpoint { pub fn new(proc: &Processor) -> Checkpoint { Checkpoint { - read_next: proc.read_next, write_next: proc.write_next, } } @@ -30,37 +31,16 @@ impl Checkpoint { } } - /// Write characters skipped from source since self. Must not have written anything since self. - pub fn write_skipped(&mut self, proc: &mut Processor) -> () { - // Make sure that nothing has been written since checkpoint (which would be lost). - debug_assert_eq!(proc.write_next, self.write_next); - // Get src code from checkpoint until last consumed character (inclusive). - let src_start = self.read_next; - let src_end = proc.read_next; - proc.code.copy_within(src_start..src_end, self.write_next); - proc.write_next += src_end - src_start; - } - /// Discard characters written since checkpoint but keep source position. pub fn erase_written(&self, proc: &mut Processor) -> () { proc.write_next = self.write_next; } - /// Get consumed characters since checkpoint as range. - pub fn consumed_range(&self, proc: &mut Processor) -> ProcessorRange { - ProcessorRange { start: self.read_next, end: proc.read_next } - } - /// Get written characters since checkpoint as range. pub fn written_range(&self, proc: &mut Processor) -> ProcessorRange { ProcessorRange { start: self.write_next, end: proc.write_next } } - /// Get amount of source characters consumed since self. - pub fn consumed_count(&self, proc: &mut Processor) -> usize { - proc.read_next - self.read_next - } - /// Get amount of output characters written since self. pub fn written_count(&self, proc: &mut Processor) -> usize { proc.write_next - self.write_next diff --git a/src/proc/entity.rs b/src/proc/entity.rs index 3097b24..96be1ab 100644 --- a/src/proc/entity.rs +++ b/src/proc/entity.rs @@ -26,9 +26,7 @@ enum Parsed { write_len: usize, }, // Some entities are shorter than their decoded UTF-8 sequence. As such, we leave them encoded. - LeftEncoded { - len: usize, - }, + LeftEncoded, // This is for any entity-like sequence that couldn't match the `ENTITY` trie. Invalid { len: usize, @@ -103,9 +101,7 @@ fn parse_entity(code: &mut [u8], read_pos: usize, write_pos: usize) -> Parsed { ), EntityType::Named(decoded) => { if decoded[0] == b'&' && decoded.len() > 1 { - Parsed::LeftEncoded { - len: decoded.len(), - } + Parsed::LeftEncoded } else { code[write_pos..write_pos + decoded.len()].copy_from_slice(decoded); Parsed::Decoded { @@ -143,7 +139,7 @@ pub fn maybe_normalise_entity(proc: &mut Processor) -> bool { Some(b'&') => { // Decode before checking to see if it continues current entity. let (read_len, write_len) = match parse_entity(proc.code, read_next, write_next) { - Parsed::LeftEncoded { len } => { + Parsed::LeftEncoded => { // Don't mistake an intentionally undecoded entity for an unintentional entity. break; } diff --git a/src/proc/mod.rs b/src/proc/mod.rs index 2d81faf..96bdd85 100644 --- a/src/proc/mod.rs +++ b/src/proc/mod.rs @@ -3,7 +3,6 @@ use std::fmt::{Debug, Formatter}; use std::ops::{Index, IndexMut}; use crate::err::{ErrorType, ProcessingResult}; -use crate::pattern::{TrieNode, TrieNodeMatch}; use crate::proc::MatchAction::*; use crate::proc::MatchMode::*; use crate::proc::range::ProcessorRange; @@ -180,21 +179,6 @@ impl<'d> Processor<'d> { ProcessorRange { start, end: start + count } } - #[inline(always)] - pub fn m_trie(&mut self, trie: &TrieNode, action: MatchAction) -> Option { - match trie.longest_matching_prefix(&self.code[self.read_next..]) { - TrieNodeMatch::Found { len, value } => { - match action { - Discard => self.read_next += len, - Keep => self._shift(len), - MatchOnly => {} - }; - Some(value) - } - TrieNodeMatch::NotFound { .. } => None, - } - } - // PUBLIC APIs. // Bounds checking pub fn at_end(&self) -> bool { diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 3bbd775..a37cab5 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,13 +1,11 @@ -use super::*; -use std::str::from_utf8; - +#[cfg(test)] fn eval(src: &'static [u8], expected: &'static [u8]) -> () { let mut code = src.to_vec(); - match hyperbuild_friendly_error(&mut code) { + match super::hyperbuild_friendly_error(&mut code) { Ok(len) => { - assert_eq!(from_utf8(&code[..len]).unwrap(), from_utf8(expected).unwrap()); + assert_eq!(std::str::from_utf8(&code[..len]).unwrap(), std::str::from_utf8(expected).unwrap()); } - Err(FriendlyError { code_context, message, .. }) => { + Err(super::FriendlyError { code_context, message, .. }) => { println!("{}", message); println!("{}", code_context); assert!(false);