minify-html/rust/main/src/parse/mod.rs

152 lines
3.6 KiB
Rust
Raw Normal View History

use crate::common::gen::codepoints::Lookup;
2021-08-05 22:07:27 -04:00
2021-08-06 02:17:45 -04:00
pub mod bang;
pub mod comment;
pub mod content;
pub mod doctype;
2021-08-06 02:17:45 -04:00
pub mod element;
pub mod instruction;
pub mod script;
pub mod style;
2021-08-06 06:16:30 -04:00
#[cfg(test)]
mod tests;
2021-08-06 02:17:45 -04:00
pub mod textarea;
pub mod title;
2021-08-05 22:07:27 -04:00
pub struct Code<'c> {
code: &'c [u8],
next: usize,
pub seen_html_open: bool,
pub seen_head_open: bool,
pub seen_head_close: bool,
pub seen_body_open: bool,
2021-08-05 22:07:27 -04:00
}
#[derive(Copy, Clone)]
pub struct Checkpoint(usize);
impl<'c> Code<'c> {
pub fn new(code: &[u8]) -> Code {
Code {
code,
next: 0,
seen_html_open: false,
seen_head_open: false,
seen_head_close: false,
seen_body_open: false,
}
2021-08-05 22:07:27 -04:00
}
pub fn as_slice(&self) -> &[u8] {
2021-08-05 22:07:27 -04:00
&self.code[self.next..]
}
pub fn take_checkpoint(&self) -> Checkpoint {
Checkpoint(self.next)
}
2021-08-06 09:18:45 -04:00
pub fn restore_checkpoint(&mut self, cp: Checkpoint) {
2021-08-05 22:07:27 -04:00
self.next = cp.0;
}
pub fn at_end(&self) -> bool {
2021-08-06 06:16:30 -04:00
debug_assert!(self.next <= self.code.len());
2021-08-05 22:07:27 -04:00
self.next == self.code.len()
}
pub fn shift_if_next(&mut self, c: u8) -> bool {
if self.code.get(self.next).filter(|&&n| n == c).is_some() {
self.next += 1;
true
} else {
false
}
}
pub fn shift_if_next_seq_case_insensitive(&mut self, seq: &[u8]) -> bool {
if self
.code
.get(self.next..self.next + seq.len())
.filter(|n| n.eq_ignore_ascii_case(seq))
.is_some()
{
self.next += seq.len();
true
} else {
2021-08-05 22:07:27 -04:00
false
}
}
pub fn shift_if_next_in_lookup(&mut self, lookup: &'static Lookup) -> Option<u8> {
2021-08-06 09:18:45 -04:00
let c = self.code.get(self.next).filter(|&&n| lookup[n]).copied();
2021-08-05 22:07:27 -04:00
if c.is_some() {
self.next += 1;
};
c
}
2021-08-06 06:16:30 -04:00
pub fn shift_if_next_not_in_lookup(&mut self, lookup: &'static Lookup) -> Option<u8> {
2021-08-06 09:23:05 -04:00
let c = self.code.get(self.next).filter(|&&n| !lookup[n]).copied();
2021-08-06 06:16:30 -04:00
if c.is_some() {
self.next += 1;
};
c
2021-08-05 22:07:27 -04:00
}
2021-08-06 09:18:45 -04:00
pub fn shift(&mut self, n: usize) {
2021-08-05 22:07:27 -04:00
self.next += n;
}
2021-08-05 23:36:07 -04:00
pub fn slice_and_shift(&mut self, n: usize) -> &[u8] {
let str = &self.code[self.next..self.next + n];
2021-08-05 22:07:27 -04:00
self.next += n;
str
}
2021-08-05 23:36:07 -04:00
pub fn copy_and_shift(&mut self, n: usize) -> Vec<u8> {
self.slice_and_shift(n).to_vec()
}
2021-08-05 22:07:27 -04:00
pub fn copy_and_shift_while_in_lookup(&mut self, lookup: &'static Lookup) -> Vec<u8> {
let mut len = 0;
loop {
match self.code.get(self.next + len) {
Some(&c) if lookup[c] => len += 1,
_ => break,
};
2021-08-06 02:19:36 -04:00
}
2021-08-05 22:07:27 -04:00
self.copy_and_shift(len)
}
2021-08-05 23:36:07 -04:00
pub fn slice_and_shift_while_not_in_lookup(&mut self, lookup: &'static Lookup) -> &[u8] {
2021-08-05 22:07:27 -04:00
let mut len = 0;
loop {
match self.code.get(self.next + len) {
Some(&c) if !lookup[c] => len += 1,
_ => break,
};
2021-08-06 02:19:36 -04:00
}
2021-08-05 23:36:07 -04:00
self.slice_and_shift(len)
}
2021-08-05 22:07:27 -04:00
// Returns the last character matched.
pub fn shift_while_in_lookup(&mut self, lookup: &'static Lookup) -> Option<u8> {
let mut last: Option<u8> = None;
loop {
match self.code.get(self.next) {
Some(&c) if lookup[c] => {
self.next += 1;
last = Some(c);
}
_ => break,
};
2021-08-06 02:19:36 -04:00
}
2021-08-05 22:07:27 -04:00
last
}
pub fn rem(&self) -> usize {
self.code.len() - self.next
}
}