Use faster manual matching for content

This commit is contained in:
Wilson Lin 2020-01-05 13:28:34 +11:00
parent bfd5f65ba1
commit 74e6352900
5 changed files with 32 additions and 20 deletions

View File

@ -1,14 +1 @@
{
"CONTENT_TYPE": {
"</": "ContentType::End",
"\u0009": "ContentType::Whitespace",
"\u000a": "ContentType::Whitespace",
"\u000c": "ContentType::Whitespace",
"\u000d": "ContentType::Whitespace",
"\u0020": "ContentType::Whitespace",
"<!--": "ContentType::Comment",
"<!": "ContentType::Bang",
"<": "ContentType::OpeningTag",
"&": "ContentType::Entity"
}
}
{}

View File

@ -371,6 +371,13 @@ impl<'d> Processor<'d> {
pub fn peek_eof(&self) -> Option<u8> {
self._maybe_read_offset(0)
}
pub fn peek_slice_offset_eof(&self, offset: usize, count: usize) -> Option<&[u8]> {
if self._in_bounds(offset + count - 1) {
Some(&self.code[self.read_next + offset..self.read_next + offset + count])
} else {
None
}
}
pub fn peek(&self) -> ProcessingResult<u8> {
self._maybe_read_offset(0).ok_or(ErrorType::UnexpectedEnd)
}
@ -444,6 +451,10 @@ impl<'d> Processor<'d> {
self._shift(1);
c
}
pub fn accept_amount_expect(&mut self, count: usize) -> () {
debug_assert!(self._in_bounds(count - 1));
self._shift(count);
}
pub fn accept_amount(&mut self, count: usize) -> ProcessingResult<()> {
// Check for zero to prevent underflow as type is usize.
if count == 0 || self._in_bounds(count - 1) {

View File

@ -2,7 +2,11 @@ use crate::err::ProcessingResult;
use crate::proc::Processor;
pub fn process_bang(proc: &mut Processor) -> ProcessingResult<()> {
chain!(proc.match_seq(b"<!").require()?.keep());
if cfg!(debug_assertions) {
chain!(proc.match_seq(b"<!").expect().keep());
} else {
proc.skip_amount_expect(2);
};
chain!(proc.match_while_not_char(b'>').keep());

View File

@ -35,10 +35,20 @@ impl ContentType {
}
fn peek(proc: &mut Processor) -> ContentType {
if proc.at_end() {
return ContentType::End;
};
proc.match_trie(CONTENT_TYPE).unwrap_or(ContentType::Text)
// Manually write out matching for fast performance as this is hot spot.
match proc.peek_eof() {
None => ContentType::End,
Some(b'<') => match proc.peek_offset_eof(1) {
Some(b'/') => ContentType::End,
Some(b'!') => match proc.peek_slice_offset_eof(2, 2) {
Some(b"--") => ContentType::Comment,
_ => ContentType::Bang,
},
_ => ContentType::OpeningTag
},
Some(b'&') => ContentType::Entity,
Some(c) => if is_whitespace(c) { ContentType::Whitespace } else { ContentType::Text },
}
}
}

View File

@ -12,7 +12,7 @@ fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
if cfg!(debug_assertions) {
chain!(proc.match_seq(b"/*").expect().keep());
} else {
proc.skip_amount_expect(2);
proc.accept_amount_expect(2);
};
// Unlike script tags, style comments do NOT end at closing tag.