Refactoring; fix whitespace minification in content
This commit is contained in:
parent
85a388d7c8
commit
da796a5839
|
@ -1,11 +1,13 @@
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum ErrorType {
|
pub enum ErrorType {
|
||||||
|
NoSpaceBeforeAttr,
|
||||||
|
UnterminatedCssString,
|
||||||
|
UnterminatedJsString,
|
||||||
CharNotFound { need: u8, got: u8 },
|
CharNotFound { need: u8, got: u8 },
|
||||||
MatchNotFound(&'static [u8]),
|
MatchNotFound(&'static [u8]),
|
||||||
NotFound(&'static str),
|
NotFound(&'static str),
|
||||||
NoSpaceBeforeAttr,
|
|
||||||
UnexpectedChar(u8),
|
UnexpectedChar(u8),
|
||||||
UnexpectedEnd,
|
UnexpectedEnd,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type InternalResult<T> = Result<T, ErrorType>;
|
pub type ProcessingResult<T> = Result<T, ErrorType>;
|
||||||
|
|
20
src/proc.rs
20
src/proc.rs
|
@ -2,7 +2,7 @@ use std::ops::Index;
|
||||||
|
|
||||||
use phf::Set;
|
use phf::Set;
|
||||||
|
|
||||||
use crate::err::{ErrorType, InternalResult};
|
use crate::err::{ErrorType, ProcessingResult};
|
||||||
|
|
||||||
macro_rules! chain {
|
macro_rules! chain {
|
||||||
($proc:ident $($tail:tt)+) => ({
|
($proc:ident $($tail:tt)+) => ({
|
||||||
|
@ -158,7 +158,7 @@ impl<'d> Processor<'d> {
|
||||||
self._new_match(count, None, RequireReason::Custom)
|
self._new_match(count, None, RequireReason::Custom)
|
||||||
}
|
}
|
||||||
// Ensure that match is nonempty or return error.
|
// Ensure that match is nonempty or return error.
|
||||||
fn _match_require(&self, custom_reason: Option<&'static str>) -> InternalResult<()> {
|
fn _match_require(&self, custom_reason: Option<&'static str>) -> ProcessingResult<()> {
|
||||||
if self.match_len > 0 {
|
if self.match_len > 0 {
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
|
@ -207,10 +207,10 @@ impl<'d> Processor<'d> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assert match.
|
// Assert match.
|
||||||
pub fn require(&self) -> InternalResult<()> {
|
pub fn require(&self) -> ProcessingResult<()> {
|
||||||
self._match_require(None)
|
self._match_require(None)
|
||||||
}
|
}
|
||||||
pub fn require_with_reason(&self, reason: &'static str) -> InternalResult<()> {
|
pub fn require_with_reason(&self, reason: &'static str) -> ProcessingResult<()> {
|
||||||
self._match_require(Some(reason))
|
self._match_require(Some(reason))
|
||||||
}
|
}
|
||||||
// TODO Document
|
// TODO Document
|
||||||
|
@ -361,20 +361,20 @@ impl<'d> Processor<'d> {
|
||||||
pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
|
pub fn peek_offset_eof(&self, offset: usize) -> Option<u8> {
|
||||||
self._maybe_read_offset(offset)
|
self._maybe_read_offset(offset)
|
||||||
}
|
}
|
||||||
pub fn peek_offset(&self, offset: usize) -> InternalResult<u8> {
|
pub fn peek_offset(&self, offset: usize) -> ProcessingResult<u8> {
|
||||||
self._maybe_read_offset(offset).ok_or(ErrorType::UnexpectedEnd)
|
self._maybe_read_offset(offset).ok_or(ErrorType::UnexpectedEnd)
|
||||||
}
|
}
|
||||||
pub fn peek_eof(&self) -> Option<u8> {
|
pub fn peek_eof(&self) -> Option<u8> {
|
||||||
self._maybe_read_offset(0)
|
self._maybe_read_offset(0)
|
||||||
}
|
}
|
||||||
pub fn peek(&self) -> InternalResult<u8> {
|
pub fn peek(&self) -> ProcessingResult<u8> {
|
||||||
self._maybe_read_offset(0).ok_or(ErrorType::UnexpectedEnd)
|
self._maybe_read_offset(0).ok_or(ErrorType::UnexpectedEnd)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consuming source characters.
|
// Consuming source characters.
|
||||||
/// Skip the next `count` characters (can be zero).
|
/// Skip the next `count` characters (can be zero).
|
||||||
/// Will result in an error if exceeds bounds.
|
/// Will result in an error if exceeds bounds.
|
||||||
pub fn skip_amount(&mut self, count: usize) -> InternalResult<()> {
|
pub fn skip_amount(&mut self, count: usize) -> ProcessingResult<()> {
|
||||||
// Check for zero to prevent underflow as type is usize.
|
// Check for zero to prevent underflow as type is usize.
|
||||||
if count == 0 || self._in_bounds(count - 1) {
|
if count == 0 || self._in_bounds(count - 1) {
|
||||||
self.read_next += count;
|
self.read_next += count;
|
||||||
|
@ -385,7 +385,7 @@ impl<'d> Processor<'d> {
|
||||||
}
|
}
|
||||||
/// Skip and return the next character.
|
/// Skip and return the next character.
|
||||||
/// Will result in an error if exceeds bounds.
|
/// Will result in an error if exceeds bounds.
|
||||||
pub fn skip(&mut self) -> InternalResult<u8> {
|
pub fn skip(&mut self) -> ProcessingResult<u8> {
|
||||||
if !self.at_end() {
|
if !self.at_end() {
|
||||||
let c = self._read_offset(0);
|
let c = self._read_offset(0);
|
||||||
self.read_next += 1;
|
self.read_next += 1;
|
||||||
|
@ -435,7 +435,7 @@ impl<'d> Processor<'d> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shifting characters.
|
// Shifting characters.
|
||||||
pub fn accept(&mut self) -> InternalResult<u8> {
|
pub fn accept(&mut self) -> ProcessingResult<u8> {
|
||||||
if !self.at_end() {
|
if !self.at_end() {
|
||||||
let c = self._read_offset(0);
|
let c = self._read_offset(0);
|
||||||
self._shift(1);
|
self._shift(1);
|
||||||
|
@ -444,7 +444,7 @@ impl<'d> Processor<'d> {
|
||||||
Err(ErrorType::UnexpectedEnd)
|
Err(ErrorType::UnexpectedEnd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn accept_amount(&mut self, count: usize) -> InternalResult<()> {
|
pub fn accept_amount(&mut self, count: usize) -> ProcessingResult<()> {
|
||||||
// Check for zero to prevent underflow as type is usize.
|
// Check for zero to prevent underflow as type is usize.
|
||||||
if count == 0 || self._in_bounds(count - 1) {
|
if count == 0 || self._in_bounds(count - 1) {
|
||||||
self._shift(count);
|
self._shift(count);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::err::InternalResult;
|
use crate::err::ProcessingResult;
|
||||||
use crate::spec::codepoint::is_control;
|
use crate::spec::codepoint::is_control;
|
||||||
use phf::{Set, phf_set};
|
use phf::{Set, phf_set};
|
||||||
use crate::unit::attr::value::process_attr_value;
|
use crate::unit::attr::value::process_attr_value;
|
||||||
|
@ -30,7 +30,7 @@ fn is_name_char(c: u8) -> bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_attr<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<AttrType> {
|
pub fn process_attr(proc: &mut Processor) -> ProcessingResult<AttrType> {
|
||||||
// Expect `process_attr` to be called at an attribute.
|
// Expect `process_attr` to be called at an attribute.
|
||||||
let name = chain!(proc.match_while_pred(is_name_char).expect().keep().slice());
|
let name = chain!(proc.match_while_pred(is_name_char).expect().keep().slice());
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use phf::{Map, phf_map};
|
use phf::{Map, phf_map};
|
||||||
|
|
||||||
use crate::err::InternalResult;
|
use crate::err::ProcessingResult;
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::spec::codepoint::is_whitespace;
|
use crate::spec::codepoint::is_whitespace;
|
||||||
use crate::unit::attr::AttrType;
|
use crate::unit::attr::AttrType;
|
||||||
|
@ -207,7 +207,7 @@ macro_rules! consume_attr_value_chars {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_attr_value<'d, 'p>(proc: &'p mut Processor<'d>, should_collapse_and_trim_ws: bool) -> InternalResult<AttrType> {
|
pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: bool) -> ProcessingResult<AttrType> {
|
||||||
// Processing a quoted attribute value is tricky, due to the fact that
|
// Processing a quoted attribute value is tricky, due to the fact that
|
||||||
// it's not possible to know whether or not to unquote the value until
|
// it's not possible to know whether or not to unquote the value until
|
||||||
// the value has been processed. For example, decoding an entity could
|
// the value has been processed. For example, decoding an entity could
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::err::InternalResult;
|
use crate::err::ProcessingResult;
|
||||||
|
|
||||||
pub fn process_bang<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
pub fn process_bang(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
chain!(proc.match_seq(b"<!").require()?.keep());
|
chain!(proc.match_seq(b"<!").require()?.keep());
|
||||||
|
|
||||||
chain!(proc.match_while_not_char(b'>').keep());
|
chain!(proc.match_while_not_char(b'>').keep());
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::err::InternalResult;
|
use crate::err::ProcessingResult;
|
||||||
|
|
||||||
pub fn process_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
pub fn process_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
chain!(proc.match_seq(b"<!--").expect().discard());
|
chain!(proc.match_seq(b"<!--").expect().discard());
|
||||||
|
|
||||||
// TODO Cannot use this pattern
|
// TODO Cannot use this pattern
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::err::InternalResult;
|
use crate::err::ProcessingResult;
|
||||||
use crate::proc::{Checkpoint, Processor, ProcessorRange};
|
use crate::proc::{Checkpoint, Processor, ProcessorRange};
|
||||||
use crate::spec::codepoint::is_whitespace;
|
use crate::spec::codepoint::is_whitespace;
|
||||||
use crate::spec::tag::content::CONTENT_TAGS;
|
use crate::spec::tag::content::CONTENT_TAGS;
|
||||||
|
@ -6,7 +6,7 @@ use crate::spec::tag::formatting::FORMATTING_TAGS;
|
||||||
use crate::spec::tag::wss::WSS_TAGS;
|
use crate::spec::tag::wss::WSS_TAGS;
|
||||||
use crate::unit::bang::process_bang;
|
use crate::unit::bang::process_bang;
|
||||||
use crate::unit::comment::process_comment;
|
use crate::unit::comment::process_comment;
|
||||||
use crate::unit::entity::process_entity;
|
use crate::unit::entity::{process_entity, maybe_process_entity};
|
||||||
use crate::unit::tag::process_tag;
|
use crate::unit::tag::process_tag;
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||||
|
@ -63,7 +63,7 @@ impl ContentType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) -> InternalResult<()> {
|
pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
|
||||||
let should_collapse_whitespace = match parent {
|
let should_collapse_whitespace = match parent {
|
||||||
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]),
|
Some(tag_name) => !WSS_TAGS.contains(&proc[tag_name]),
|
||||||
// Should collapse whitespace for root content.
|
// Should collapse whitespace for root content.
|
||||||
|
@ -87,19 +87,39 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
||||||
|
|
||||||
let mut last_non_whitespace_content_type = ContentType::Start;
|
let mut last_non_whitespace_content_type = ContentType::Start;
|
||||||
// Whether or not currently in whitespace.
|
// Whether or not currently in whitespace.
|
||||||
let mut whitespace_checkpoint: Option<Checkpoint> = None;
|
let mut whitespace_checkpoint_opt: Option<Checkpoint> = None;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let next_content_type = ContentType::peek(proc);
|
let next_content_type = match ContentType::peek(proc) {
|
||||||
|
ContentType::Entity => {
|
||||||
|
let e = maybe_process_entity(proc)?;
|
||||||
|
// Entity could decode to whitespace.
|
||||||
|
if e.code_point()
|
||||||
|
.filter(|c| *c < 0x7f)
|
||||||
|
.filter(|c| is_whitespace(*c as u8))
|
||||||
|
.is_some() {
|
||||||
|
// Skip whitespace char, and mark as whitespace.
|
||||||
|
ContentType::Whitespace
|
||||||
|
} else {
|
||||||
|
// Not whitespace, so decode and write.
|
||||||
|
e.keep(proc);
|
||||||
|
ContentType::Entity
|
||||||
|
}
|
||||||
|
},
|
||||||
|
ContentType::Whitespace => {
|
||||||
|
// This is here to prevent skipping twice from decoded whitespace entity.
|
||||||
|
// Whitespace is always ignored and then processed afterwards, even if not minifying.
|
||||||
|
proc.skip().expect("skipping known character");
|
||||||
|
ContentType::Whitespace
|
||||||
|
},
|
||||||
|
other_type => other_type,
|
||||||
|
};
|
||||||
|
|
||||||
if next_content_type == ContentType::Whitespace {
|
if next_content_type == ContentType::Whitespace {
|
||||||
// Whitespace is always ignored and then processed afterwards, even if not minifying.
|
if let None = whitespace_checkpoint_opt {
|
||||||
proc.skip()?;
|
|
||||||
|
|
||||||
if let None = whitespace_checkpoint {
|
|
||||||
// This is the start of one or more whitespace characters, so start a view of this contiguous whitespace
|
// This is the start of one or more whitespace characters, so start a view of this contiguous whitespace
|
||||||
// and don't write any characters that are part of it yet.
|
// and don't write any characters that are part of it yet.
|
||||||
whitespace_checkpoint = Some(proc.checkpoint());
|
whitespace_checkpoint_opt = Some(proc.checkpoint());
|
||||||
} else {
|
} else {
|
||||||
// This is part of a contiguous whitespace, but not the start of, so simply ignore.
|
// This is part of a contiguous whitespace, but not the start of, so simply ignore.
|
||||||
}
|
}
|
||||||
|
@ -107,7 +127,7 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next character is not whitespace, so handle any previously ignored whitespace.
|
// Next character is not whitespace, so handle any previously ignored whitespace.
|
||||||
if let Some(chkpt) = whitespace_checkpoint {
|
if let Some(ws) = whitespace_checkpoint_opt {
|
||||||
if should_destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_opening_tag() && next_content_type.is_comment_bang_opening_tag() {
|
if should_destroy_whole_whitespace && last_non_whitespace_content_type.is_comment_bang_opening_tag() && next_content_type.is_comment_bang_opening_tag() {
|
||||||
// Whitespace is between two tags, comments, or bangs.
|
// Whitespace is between two tags, comments, or bangs.
|
||||||
// destroy_whole_whitespace is on, so don't write it.
|
// destroy_whole_whitespace is on, so don't write it.
|
||||||
|
@ -119,11 +139,11 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
||||||
proc.write(b' ');
|
proc.write(b' ');
|
||||||
} else {
|
} else {
|
||||||
// Whitespace cannot be minified, so write in entirety.
|
// Whitespace cannot be minified, so write in entirety.
|
||||||
proc.write_skipped(chkpt);
|
proc.write_skipped(ws);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset whitespace buffer.
|
// Reset whitespace buffer.
|
||||||
whitespace_checkpoint = None;
|
whitespace_checkpoint_opt = None;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Process and consume next character(s).
|
// Process and consume next character(s).
|
||||||
|
@ -131,17 +151,14 @@ pub fn process_content(proc: &mut Processor, parent: Option<ProcessorRange>) ->
|
||||||
ContentType::Comment => { process_comment(proc)?; }
|
ContentType::Comment => { process_comment(proc)?; }
|
||||||
ContentType::Bang => { process_bang(proc)?; }
|
ContentType::Bang => { process_bang(proc)?; }
|
||||||
ContentType::OpeningTag => { process_tag(proc)?; }
|
ContentType::OpeningTag => { process_tag(proc)?; }
|
||||||
ContentType::End => (),
|
ContentType::End => { break; }
|
||||||
ContentType::Entity => { process_entity(proc)?; }
|
// Entity has already been processed.
|
||||||
|
ContentType::Entity => {}
|
||||||
ContentType::Text => { proc.accept()?; }
|
ContentType::Text => { proc.accept()?; }
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
if next_content_type == ContentType::End {
|
last_non_whitespace_content_type = next_content_type;
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
last_non_whitespace_content_type = next_content_type;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -39,10 +39,10 @@
|
||||||
// - An entity is considered invalid if it is well formed but represents a
|
// - An entity is considered invalid if it is well formed but represents a
|
||||||
// non-existent Unicode code point or reference name.
|
// non-existent Unicode code point or reference name.
|
||||||
|
|
||||||
use crate::proc::Processor;
|
use crate::err::ProcessingResult;
|
||||||
use crate::spec::codepoint::{is_digit, is_upper_hex_digit, is_lower_hex_digit, is_hex_digit};
|
use crate::proc::{Checkpoint, Processor};
|
||||||
|
use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit};
|
||||||
use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
|
use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
|
||||||
use crate::err::InternalResult;
|
|
||||||
|
|
||||||
const MAX_UNICODE_CODE_POINT: u32 = 0x10FFFF;
|
const MAX_UNICODE_CODE_POINT: u32 = 0x10FFFF;
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ fn parse_hexadecimal(slice: &[u8]) -> Option<u32> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
|
// This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
|
||||||
pub fn parse_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<Option<u32>> {
|
pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<Option<u32>> {
|
||||||
chain!(proc.match_char(b'&').expect().discard());
|
chain!(proc.match_char(b'&').expect().discard());
|
||||||
|
|
||||||
// The input can end at any time after initial ampersand.
|
// The input can end at any time after initial ampersand.
|
||||||
|
@ -152,7 +152,7 @@ pub fn parse_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<Optio
|
||||||
Type::Malformed => None,
|
Type::Malformed => None,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Try consuming semicolon before getting data as slice to prevent issues with borrowing.
|
// Consume semicolon after using borrowed data slice.
|
||||||
if entity_type != Type::Malformed && !chain!(proc.match_char(b';').discard().matched()) {
|
if entity_type != Type::Malformed && !chain!(proc.match_char(b';').discard().matched()) {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
} else {
|
} else {
|
||||||
|
@ -160,22 +160,40 @@ pub fn parse_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<Optio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct ParsedEntity {
|
||||||
|
code_point: Option<u32>,
|
||||||
|
checkpoint: Checkpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParsedEntity {
|
||||||
|
pub fn code_point(&self) -> Option<u32> {
|
||||||
|
self.code_point
|
||||||
|
}
|
||||||
|
pub fn keep(&self, proc: &mut Processor) -> () {
|
||||||
|
if let Some(cp) = self.code_point {
|
||||||
|
proc.write_utf8(cp);
|
||||||
|
} else {
|
||||||
|
// Write discarded characters that could not form a well formed entity.
|
||||||
|
proc.write_skipped(self.checkpoint);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn maybe_process_entity(proc: &mut Processor) -> ProcessingResult<ParsedEntity> {
|
||||||
|
let checkpoint = proc.checkpoint();
|
||||||
|
let code_point = parse_entity(proc)?;
|
||||||
|
|
||||||
|
Ok(ParsedEntity { code_point, checkpoint })
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process an HTML entity.
|
* Process an HTML entity.
|
||||||
*
|
*
|
||||||
* @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
|
* @return Unicode code point of the entity, or HB_UNIT_ENTITY_NONE if the
|
||||||
* entity is malformed or invalid
|
* entity is malformed or invalid
|
||||||
*/
|
*/
|
||||||
pub fn process_entity<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<Option<u32>> {
|
pub fn process_entity(proc: &mut Processor) -> ProcessingResult<Option<u32>> {
|
||||||
let checkpoint = proc.checkpoint();
|
let e = maybe_process_entity(proc)?;
|
||||||
let parsed = parse_entity(proc)?;
|
e.keep(proc);
|
||||||
|
Ok(e.code_point())
|
||||||
if let Some(cp) = parsed {
|
|
||||||
proc.write_utf8(cp);
|
|
||||||
} else {
|
|
||||||
// Write discarded characters that could not form a well formed entity.
|
|
||||||
proc.write_skipped(checkpoint);
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(parsed)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
use crate::err::{InternalResult, ErrorType};
|
use crate::err::{ProcessingResult, ErrorType};
|
||||||
use crate::proc::{Processor};
|
use crate::proc::{Processor};
|
||||||
|
|
||||||
fn is_string_delimiter(c: u8) -> bool {
|
fn is_string_delimiter(c: u8) -> bool {
|
||||||
c == b'"' || c == b'\''
|
c == b'"' || c == b'\''
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_comment_single<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
chain!(proc.match_seq(b"//").expect().keep());
|
chain!(proc.match_seq(b"//").expect().keep());
|
||||||
|
|
||||||
// Comment can end at closing </script>.
|
// Comment can end at closing </script>.
|
||||||
|
@ -22,7 +22,7 @@ fn parse_comment_single<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_comment_multi<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
chain!(proc.match_seq(b"/*").expect().keep());
|
chain!(proc.match_seq(b"/*").expect().keep());
|
||||||
|
|
||||||
// Comment can end at closing </script>.
|
// Comment can end at closing </script>.
|
||||||
|
@ -39,7 +39,7 @@ fn parse_comment_multi<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
|
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
|
||||||
|
|
||||||
let mut escaping = false;
|
let mut escaping = false;
|
||||||
|
@ -58,7 +58,7 @@ fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
||||||
|
|
||||||
if chain!(proc.match_line_terminator().keep().matched()) {
|
if chain!(proc.match_line_terminator().keep().matched()) {
|
||||||
if !escaping {
|
if !escaping {
|
||||||
return Err(ErrorType::NotFound("Unterminated JavaScript string"));
|
return Err(ErrorType::UnterminatedJsString);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_template<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
fn parse_template(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
chain!(proc.match_char(b'`').expect().keep());
|
chain!(proc.match_char(b'`').expect().keep());
|
||||||
|
|
||||||
let mut escaping = false;
|
let mut escaping = false;
|
||||||
|
@ -91,7 +91,7 @@ fn parse_template<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_script<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
pub fn process_script(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
while !chain!(proc.match_seq(b"</").matched()) {
|
while !chain!(proc.match_seq(b"</").matched()) {
|
||||||
if chain!(proc.match_seq(b"//").matched()) {
|
if chain!(proc.match_seq(b"//").matched()) {
|
||||||
parse_comment_single(proc)?;
|
parse_comment_single(proc)?;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::err::{InternalResult, ErrorType};
|
use crate::err::{ProcessingResult, ErrorType};
|
||||||
|
|
||||||
fn is_string_delimiter(c: u8) -> bool {
|
fn is_string_delimiter(c: u8) -> bool {
|
||||||
match c {
|
match c {
|
||||||
|
@ -8,7 +8,7 @@ fn is_string_delimiter(c: u8) -> bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
chain!(proc.match_seq(b"/*").expect().keep());
|
chain!(proc.match_seq(b"/*").expect().keep());
|
||||||
|
|
||||||
// Unlike script tags, style comments do NOT end at closing tag.
|
// Unlike script tags, style comments do NOT end at closing tag.
|
||||||
|
@ -19,7 +19,7 @@ fn parse_comment<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
|
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
|
||||||
|
|
||||||
let mut escaping = false;
|
let mut escaping = false;
|
||||||
|
@ -38,8 +38,7 @@ fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
||||||
|
|
||||||
if chain!(proc.match_line_terminator().keep().matched()) {
|
if chain!(proc.match_line_terminator().keep().matched()) {
|
||||||
if !escaping {
|
if !escaping {
|
||||||
// TODO Use better error type.
|
return Err(ErrorType::UnterminatedCssString);
|
||||||
return Err(ErrorType::NotFound("Unterminated CSS string"));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,7 +48,7 @@ fn parse_string<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_style<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
pub fn process_style(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
while !chain!(proc.match_seq(b"</").matched()) {
|
while !chain!(proc.match_seq(b"</").matched()) {
|
||||||
if chain!(proc.match_seq(b"/*").matched()) {
|
if chain!(proc.match_seq(b"/*").matched()) {
|
||||||
parse_comment(proc)?;
|
parse_comment(proc)?;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::err::{ErrorType, InternalResult};
|
use crate::err::{ErrorType, ProcessingResult};
|
||||||
use crate::proc::Processor;
|
use crate::proc::Processor;
|
||||||
use crate::spec::codepoint::{is_alphanumeric, is_whitespace};
|
use crate::spec::codepoint::{is_alphanumeric, is_whitespace};
|
||||||
use crate::spec::tag::void::VOID_TAGS;
|
use crate::spec::tag::void::VOID_TAGS;
|
||||||
|
@ -14,7 +14,7 @@ fn is_valid_tag_name_char(c: u8) -> bool {
|
||||||
is_alphanumeric(c) || c == b':' || c == b'-'
|
is_alphanumeric(c) || c == b':' || c == b'-'
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn process_tag<'d, 'p>(proc: &'p mut Processor<'d>) -> InternalResult<()> {
|
pub fn process_tag(proc: &mut Processor) -> ProcessingResult<()> {
|
||||||
// TODO Minify opening and closing tag whitespace before name and after name/last attr.
|
// TODO Minify opening and closing tag whitespace before name and after name/last attr.
|
||||||
// TODO DOC No checking if opening and closing names match.
|
// TODO DOC No checking if opening and closing names match.
|
||||||
// Expect to be currently at an opening tag.
|
// Expect to be currently at an opening tag.
|
||||||
|
|
Loading…
Reference in New Issue