Update README; expect on debug only

This commit is contained in:
Wilson Lin 2019-12-30 11:33:49 +11:00
parent 7349b15ab8
commit 90fca89160
10 changed files with 66 additions and 21 deletions

View File

@ -241,7 +241,7 @@ However, there are some syntax requirements for speed and sanity reasons.
### Tags
Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element
Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element, `bR` won't be considered as a void tag, and `Script` won't be parsed as JavaScript.
### Entities
@ -266,7 +266,7 @@ It is an error to place whitespace between `=` and attribute names/values. It is
<div class="a"name="1" id = "a"></div>
```
Special handling of some attributes require case sensitive names and values. For example, `class` and `type="text/javascript"`.
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block).
### Script and style

View File

@ -379,8 +379,12 @@ impl<'d> Processor<'d> {
Err(ErrorType::UnexpectedEnd)
}
}
pub fn skip_amount_expect(&mut self, amount: usize) -> () {
debug_assert!(!self.at_end(), "skip known characters");
self.read_next += amount;
}
pub fn skip_expect(&mut self) -> () {
assert!(!self.at_end(), "skip known character");
debug_assert!(!self.at_end(), "skip known character");
self.read_next += 1;
}
@ -415,6 +419,12 @@ impl<'d> Processor<'d> {
Err(ErrorType::UnexpectedEnd)
}
}
pub fn accept_expect(&mut self) -> u8 {
debug_assert!(!self.at_end());
let c = self._read_offset(0);
self._shift(1);
c
}
pub fn accept_amount(&mut self, count: usize) -> ProcessingResult<()> {
// Check for zero to prevent underflow as type is usize.
if count == 0 || self._in_bounds(count - 1) {

View File

@ -40,7 +40,6 @@ pub fn process_attr(proc: &mut Processor) -> ProcessingResult<ProcessedAttr> {
let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().range());
let after_name = proc.checkpoint();
// TODO DOC Attr must be case sensitive
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
let has_value = chain!(proc.match_char(b'=').keep().matched());

View File

@ -307,7 +307,11 @@ pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: boo
let processed_value_range = proc.written_range(processed_value_checkpoint);
// Ensure closing delimiter in src has been matched and discarded, if any.
if let Some(c) = src_delimiter {
chain!(proc.match_char(c).expect().discard());
if cfg!(debug_assertions) {
chain!(proc.match_char(c).expect().discard());
} else {
proc.skip_expect();
};
}
// Write closing delimiter, if any.
if let Some(c) = optimal_delimiter_char {

View File

@ -3,7 +3,11 @@ use crate::pattern;
use crate::proc::Processor;
pub fn process_comment(proc: &mut Processor) -> ProcessingResult<()> {
chain!(proc.match_seq(b"<!--").expect().discard());
if cfg!(debug_assertions) {
chain!(proc.match_seq(b"<!--").expect().discard());
} else {
proc.skip_amount_expect(4);
}
chain!(proc.match_while_not_seq(pattern::COMMENT_END).discard());

View File

@ -114,7 +114,11 @@ fn parse_name(proc: &mut Processor) -> Option<EntityType> {
// This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
let checkpoint = proc.checkpoint();
chain!(proc.match_char(b'&').expect().discard());
if cfg!(debug_assertions) {
chain!(proc.match_char(b'&').expect().discard());
} else {
proc.skip_expect();
};
// The input can end at any time after initial ampersand.
// Examples of valid complete source code: "&", "&a", "&#", "&#09",
@ -128,7 +132,7 @@ pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
// characters after the initial ampersand, e.g. "&#", "&#x", "&a".
// 2. Parse the entity data, i.e. the characters between the ampersand
// and semicolon.
// - TODO To avoid parsing forever on malformed entities without
// - To avoid parsing forever on malformed entities without
// semicolons, there is an upper bound on the amount of possible
// characters, based on the type of entity detected from the first
// stage.
@ -136,7 +140,6 @@ pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
// - This simply checks if it refers to a valid Unicode code point or
// entity reference name.
// TODO Could optimise.
// These functions do not return EntityType::Malformed as it requires a checkpoint.
// Instead, they return None if entity is malformed.
let entity_type = if chain!(proc.match_seq(b"#x").discard().matched()) {

View File

@ -6,10 +6,13 @@ fn is_string_delimiter(c: u8) -> bool {
}
fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
chain!(proc.match_seq(b"//").expect().keep());
if cfg!(debug_assertions) {
chain!(proc.match_seq(b"//").expect().keep());
} else {
proc.skip_amount_expect(2);
};
// Comment can end at closing </script>.
// TODO WARNING: Closing tag must not contain whitespace.
// TODO Optimise
while !chain!(proc.match_line_terminator().keep().matched()) {
if chain!(proc.match_seq(b"</script>").matched()) {
@ -23,10 +26,13 @@ fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
}
fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
chain!(proc.match_seq(b"/*").expect().keep());
if cfg!(debug_assertions) {
chain!(proc.match_seq(b"/*").expect().keep());
} else {
proc.skip_amount_expect(2);
};
// Comment can end at closing </script>.
// TODO WARNING: Closing tag must not contain whitespace.
// TODO Optimise
while !chain!(proc.match_seq(b"*/").keep().matched()) {
if chain!(proc.match_seq(b"</script>").matched()) {
@ -40,7 +46,11 @@ fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
}
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
let delim = if cfg!(debug_assertions) {
chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
} else {
proc.accept_expect()
};
let mut escaping = false;
@ -69,7 +79,11 @@ fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
}
fn parse_template(proc: &mut Processor) -> ProcessingResult<()> {
chain!(proc.match_char(b'`').expect().keep());
if cfg!(debug_assertions) {
chain!(proc.match_char(b'`').expect().keep());
} else {
proc.skip_expect();
};
let mut escaping = false;

View File

@ -14,12 +14,11 @@ pub fn process_text_script(proc: &mut Processor) -> ProcessingResult<()> {
comment_has_unclosed_script = false;
in_comment = false;
} else if in_comment && chain!(proc.match_seq(b"<script").keep().matched()) {
// TODO DOC Case sensitive, no space before tag name, nothing else in tag.
// TODO DOC Case sensitive, nothing else in tag.
// TODO Opening tag can have attributes, whitespace, etc.
chain!(proc.match_char(b'>').require()?.keep());
comment_has_unclosed_script = true;
} else if chain!(proc.match_seq(b"</script").matched()) {
// TODO DOC Case sensitive, no space before tag name, nothing else in tag.
if !comment_has_unclosed_script {
break;
}

View File

@ -9,7 +9,11 @@ fn is_string_delimiter(c: u8) -> bool {
}
fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
chain!(proc.match_seq(b"/*").expect().keep());
if cfg!(debug_assertions) {
chain!(proc.match_seq(b"/*").expect().keep());
} else {
proc.skip_amount_expect(2);
};
// Unlike script tags, style comments do NOT end at closing tag.
while !chain!(proc.match_seq(b"*/").keep().matched()) {
@ -20,7 +24,11 @@ fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
}
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
let delim = if cfg!(debug_assertions) {
chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
} else {
proc.accept_expect()
};
let mut escaping = false;

View File

@ -42,10 +42,14 @@ enum TagType {
}
pub fn process_tag(proc: &mut Processor) -> ProcessingResult<()> {
// TODO Minify opening and closing tag whitespace before name and after name/last attr.
// TODO Minify opening and closing tag whitespace after name and last attr.
// TODO DOC No checking if opening and closing names match.
// Expect to be currently at an opening tag.
chain!(proc.match_char(b'<').expect().keep());
if cfg!(debug_assertions) {
chain!(proc.match_char(b'<').expect().keep());
} else {
proc.skip_expect();
};
// May not be valid tag name at current position, so require instead of expect.
let opening_name_range = chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("tag name")?.keep().out_range());