Update README; expect on debug only
This commit is contained in:
parent
7349b15ab8
commit
90fca89160
|
@ -241,7 +241,7 @@ However, there are some syntax requirements for speed and sanity reasons.
|
|||
|
||||
### Tags
|
||||
|
||||
Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element
|
||||
Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element, `bR` won't be considered as a void tag, and `Script` won't be parsed as JavaScript.
|
||||
|
||||
### Entities
|
||||
|
||||
|
@ -266,7 +266,7 @@ It is an error to place whitespace between `=` and attribute names/values. It is
|
|||
<div class="a"name="1" id = "a"></div>
|
||||
```
|
||||
|
||||
Special handling of some attributes require case sensitive names and values. For example, `class` and `type="text/javascript"`.
|
||||
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block).
|
||||
|
||||
### Script and style
|
||||
|
||||
|
|
12
src/proc.rs
12
src/proc.rs
|
@ -379,8 +379,12 @@ impl<'d> Processor<'d> {
|
|||
Err(ErrorType::UnexpectedEnd)
|
||||
}
|
||||
}
|
||||
pub fn skip_amount_expect(&mut self, amount: usize) -> () {
|
||||
debug_assert!(!self.at_end(), "skip known characters");
|
||||
self.read_next += amount;
|
||||
}
|
||||
pub fn skip_expect(&mut self) -> () {
|
||||
assert!(!self.at_end(), "skip known character");
|
||||
debug_assert!(!self.at_end(), "skip known character");
|
||||
self.read_next += 1;
|
||||
}
|
||||
|
||||
|
@ -415,6 +419,12 @@ impl<'d> Processor<'d> {
|
|||
Err(ErrorType::UnexpectedEnd)
|
||||
}
|
||||
}
|
||||
pub fn accept_expect(&mut self) -> u8 {
|
||||
debug_assert!(!self.at_end());
|
||||
let c = self._read_offset(0);
|
||||
self._shift(1);
|
||||
c
|
||||
}
|
||||
pub fn accept_amount(&mut self, count: usize) -> ProcessingResult<()> {
|
||||
// Check for zero to prevent underflow as type is usize.
|
||||
if count == 0 || self._in_bounds(count - 1) {
|
||||
|
|
|
@ -40,7 +40,6 @@ pub fn process_attr(proc: &mut Processor) -> ProcessingResult<ProcessedAttr> {
|
|||
let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().range());
|
||||
let after_name = proc.checkpoint();
|
||||
|
||||
// TODO DOC Attr must be case sensitive
|
||||
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
|
||||
let has_value = chain!(proc.match_char(b'=').keep().matched());
|
||||
|
||||
|
|
|
@ -307,7 +307,11 @@ pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: boo
|
|||
let processed_value_range = proc.written_range(processed_value_checkpoint);
|
||||
// Ensure closing delimiter in src has been matched and discarded, if any.
|
||||
if let Some(c) = src_delimiter {
|
||||
chain!(proc.match_char(c).expect().discard());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(c).expect().discard());
|
||||
} else {
|
||||
proc.skip_expect();
|
||||
};
|
||||
}
|
||||
// Write closing delimiter, if any.
|
||||
if let Some(c) = optimal_delimiter_char {
|
||||
|
|
|
@ -3,7 +3,11 @@ use crate::pattern;
|
|||
use crate::proc::Processor;
|
||||
|
||||
pub fn process_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
chain!(proc.match_seq(b"<!--").expect().discard());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"<!--").expect().discard());
|
||||
} else {
|
||||
proc.skip_amount_expect(4);
|
||||
}
|
||||
|
||||
chain!(proc.match_while_not_seq(pattern::COMMENT_END).discard());
|
||||
|
||||
|
|
|
@ -114,7 +114,11 @@ fn parse_name(proc: &mut Processor) -> Option<EntityType> {
|
|||
// This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
|
||||
pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
|
||||
let checkpoint = proc.checkpoint();
|
||||
chain!(proc.match_char(b'&').expect().discard());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(b'&').expect().discard());
|
||||
} else {
|
||||
proc.skip_expect();
|
||||
};
|
||||
|
||||
// The input can end at any time after initial ampersand.
|
||||
// Examples of valid complete source code: "&", "&a", "&#", "	",
|
||||
|
@ -128,7 +132,7 @@ pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
|
|||
// characters after the initial ampersand, e.g. "&#", "&#x", "&a".
|
||||
// 2. Parse the entity data, i.e. the characters between the ampersand
|
||||
// and semicolon.
|
||||
// - TODO To avoid parsing forever on malformed entities without
|
||||
// - To avoid parsing forever on malformed entities without
|
||||
// semicolons, there is an upper bound on the amount of possible
|
||||
// characters, based on the type of entity detected from the first
|
||||
// stage.
|
||||
|
@ -136,7 +140,6 @@ pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
|
|||
// - This simply checks if it refers to a valid Unicode code point or
|
||||
// entity reference name.
|
||||
|
||||
// TODO Could optimise.
|
||||
// These functions do not return EntityType::Malformed as it requires a checkpoint.
|
||||
// Instead, they return None if entity is malformed.
|
||||
let entity_type = if chain!(proc.match_seq(b"#x").discard().matched()) {
|
||||
|
|
|
@ -6,10 +6,13 @@ fn is_string_delimiter(c: u8) -> bool {
|
|||
}
|
||||
|
||||
fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
chain!(proc.match_seq(b"//").expect().keep());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"//").expect().keep());
|
||||
} else {
|
||||
proc.skip_amount_expect(2);
|
||||
};
|
||||
|
||||
// Comment can end at closing </script>.
|
||||
// TODO WARNING: Closing tag must not contain whitespace.
|
||||
// TODO Optimise
|
||||
while !chain!(proc.match_line_terminator().keep().matched()) {
|
||||
if chain!(proc.match_seq(b"</script>").matched()) {
|
||||
|
@ -23,10 +26,13 @@ fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
}
|
||||
|
||||
fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
chain!(proc.match_seq(b"/*").expect().keep());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"/*").expect().keep());
|
||||
} else {
|
||||
proc.skip_amount_expect(2);
|
||||
};
|
||||
|
||||
// Comment can end at closing </script>.
|
||||
// TODO WARNING: Closing tag must not contain whitespace.
|
||||
// TODO Optimise
|
||||
while !chain!(proc.match_seq(b"*/").keep().matched()) {
|
||||
if chain!(proc.match_seq(b"</script>").matched()) {
|
||||
|
@ -40,7 +46,11 @@ fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
}
|
||||
|
||||
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
|
||||
let delim = if cfg!(debug_assertions) {
|
||||
chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
|
||||
} else {
|
||||
proc.accept_expect()
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
|
||||
|
@ -69,7 +79,11 @@ fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
}
|
||||
|
||||
fn parse_template(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
chain!(proc.match_char(b'`').expect().keep());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(b'`').expect().keep());
|
||||
} else {
|
||||
proc.skip_expect();
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
|
||||
|
|
|
@ -14,12 +14,11 @@ pub fn process_text_script(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
comment_has_unclosed_script = false;
|
||||
in_comment = false;
|
||||
} else if in_comment && chain!(proc.match_seq(b"<script").keep().matched()) {
|
||||
// TODO DOC Case sensitive, no space before tag name, nothing else in tag.
|
||||
// TODO DOC Case sensitive, nothing else in tag.
|
||||
// TODO Opening tag can have attributes, whitespace, etc.
|
||||
chain!(proc.match_char(b'>').require()?.keep());
|
||||
comment_has_unclosed_script = true;
|
||||
} else if chain!(proc.match_seq(b"</script").matched()) {
|
||||
// TODO DOC Case sensitive, no space before tag name, nothing else in tag.
|
||||
if !comment_has_unclosed_script {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -9,7 +9,11 @@ fn is_string_delimiter(c: u8) -> bool {
|
|||
}
|
||||
|
||||
fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
chain!(proc.match_seq(b"/*").expect().keep());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_seq(b"/*").expect().keep());
|
||||
} else {
|
||||
proc.skip_amount_expect(2);
|
||||
};
|
||||
|
||||
// Unlike script tags, style comments do NOT end at closing tag.
|
||||
while !chain!(proc.match_seq(b"*/").keep().matched()) {
|
||||
|
@ -20,7 +24,11 @@ fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
|
|||
}
|
||||
|
||||
fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
|
||||
let delim = if cfg!(debug_assertions) {
|
||||
chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
|
||||
} else {
|
||||
proc.accept_expect()
|
||||
};
|
||||
|
||||
let mut escaping = false;
|
||||
|
||||
|
|
|
@ -42,10 +42,14 @@ enum TagType {
|
|||
}
|
||||
|
||||
pub fn process_tag(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
// TODO Minify opening and closing tag whitespace before name and after name/last attr.
|
||||
// TODO Minify opening and closing tag whitespace after name and last attr.
|
||||
// TODO DOC No checking if opening and closing names match.
|
||||
// Expect to be currently at an opening tag.
|
||||
chain!(proc.match_char(b'<').expect().keep());
|
||||
if cfg!(debug_assertions) {
|
||||
chain!(proc.match_char(b'<').expect().keep());
|
||||
} else {
|
||||
proc.skip_expect();
|
||||
};
|
||||
// May not be valid tag name at current position, so require instead of expect.
|
||||
let opening_name_range = chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("tag name")?.keep().out_range());
|
||||
|
||||
|
|
Loading…
Reference in New Issue