Update README; expect on debug only

2019-12-30 11:33:49 +11:00 · 2019-12-30 11:33:49 +11:00 · 90fca89160
parent 7349b15ab8
commit 90fca89160
10 changed files with 66 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -241,7 +241,7 @@ However, there are some syntax requirements for speed and sanity reasons.

 ### Tags

-Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element
+Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element, `bR` won't be considered as a void tag, and `Script` won't be parsed as JavaScript.

 ### Entities

@ -266,7 +266,7 @@ It is an error to place whitespace between `=` and attribute names/values. It is
 <div class="a"name="1" id = "a"></div>
 ```

-Special handling of some attributes require case sensitive names and values. For example, `class` and `type="text/javascript"`.
+Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify and `type="Text/JavaScript"` on a `<script>` will cause the element to be parsed as a [data block](https://html.spec.whatwg.org/dev/scripting.html#data-block).

 ### Script and style

--- a/src/proc.rs
+++ b/src/proc.rs
@ -379,8 +379,12 @@ impl<'d> Processor<'d> {
            Err(ErrorType::UnexpectedEnd)
        }
    }
+    pub fn skip_amount_expect(&mut self, amount: usize) -> () {
+        debug_assert!(!self.at_end(), "skip known characters");
+        self.read_next += amount;
+    }
    pub fn skip_expect(&mut self) -> () {
-        assert!(!self.at_end(), "skip known character");
+        debug_assert!(!self.at_end(), "skip known character");
        self.read_next += 1;
    }

@ -415,6 +419,12 @@ impl<'d> Processor<'d> {
            Err(ErrorType::UnexpectedEnd)
        }
    }
+    pub fn accept_expect(&mut self) -> u8 {
+        debug_assert!(!self.at_end());
+        let c = self._read_offset(0);
+        self._shift(1);
+        c
+    }
    pub fn accept_amount(&mut self, count: usize) -> ProcessingResult<()> {
        // Check for zero to prevent underflow as type is usize.
        if count == 0 || self._in_bounds(count - 1) {
--- a/src/unit/attr/mod.rs
+++ b/src/unit/attr/mod.rs
@ -40,7 +40,6 @@ pub fn process_attr(proc: &mut Processor) -> ProcessingResult<ProcessedAttr> {
    let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().range());
    let after_name = proc.checkpoint();

-    // TODO DOC Attr must be case sensitive
    let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
    let has_value = chain!(proc.match_char(b'=').keep().matched());

--- a/src/unit/attr/value.rs
+++ b/src/unit/attr/value.rs
@ -307,7 +307,11 @@ pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: boo
    let processed_value_range = proc.written_range(processed_value_checkpoint);
    // Ensure closing delimiter in src has been matched and discarded, if any.
    if let Some(c) = src_delimiter {
-        chain!(proc.match_char(c).expect().discard());
+        if cfg!(debug_assertions) {
+            chain!(proc.match_char(c).expect().discard());
+        } else {
+            proc.skip_expect();
+        };
    }
    // Write closing delimiter, if any.
    if let Some(c) = optimal_delimiter_char {
--- a/src/unit/comment.rs
+++ b/src/unit/comment.rs
@ -3,7 +3,11 @@ use crate::pattern;
 use crate::proc::Processor;

 pub fn process_comment(proc: &mut Processor) -> ProcessingResult<()> {
-    chain!(proc.match_seq(b"<!--").expect().discard());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_seq(b"<!--").expect().discard());
+    } else {
+        proc.skip_amount_expect(4);
+    }

    chain!(proc.match_while_not_seq(pattern::COMMENT_END).discard());

--- a/src/unit/entity.rs
+++ b/src/unit/entity.rs
@ -114,7 +114,11 @@ fn parse_name(proc: &mut Processor) -> Option<EntityType> {
 // This will parse and skip characters. Set a checkpoint to later write skipped, or to ignore results and reset to previous position.
 pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
    let checkpoint = proc.checkpoint();
-    chain!(proc.match_char(b'&').expect().discard());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_char(b'&').expect().discard());
+    } else {
+        proc.skip_expect();
+    };

    // The input can end at any time after initial ampersand.
    // Examples of valid complete source code: "&", "&a", "&#", "&#09",
@ -128,7 +132,7 @@ pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
    //    characters after the initial ampersand, e.g. "&#", "&#x", "&a".
    // 2. Parse the entity data, i.e. the characters between the ampersand
    // and semicolon.
-    //    - TODO To avoid parsing forever on malformed entities without
+    //    - To avoid parsing forever on malformed entities without
    //    semicolons, there is an upper bound on the amount of possible
    //    characters, based on the type of entity detected from the first
    //    stage.
@ -136,7 +140,6 @@ pub fn parse_entity(proc: &mut Processor) -> ProcessingResult<EntityType> {
    //    - This simply checks if it refers to a valid Unicode code point or
    //    entity reference name.

-    // TODO Could optimise.
    // These functions do not return EntityType::Malformed as it requires a checkpoint.
    // Instead, they return None if entity is malformed.
    let entity_type = if chain!(proc.match_seq(b"#x").discard().matched()) {
--- a/src/unit/script/js.rs
+++ b/src/unit/script/js.rs
@ -6,10 +6,13 @@ fn is_string_delimiter(c: u8) -> bool {
 }

 fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
-    chain!(proc.match_seq(b"//").expect().keep());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_seq(b"//").expect().keep());
+    } else {
+        proc.skip_amount_expect(2);
+    };

    // Comment can end at closing </script>.
-    // TODO WARNING: Closing tag must not contain whitespace.
    // TODO Optimise
    while !chain!(proc.match_line_terminator().keep().matched()) {
        if chain!(proc.match_seq(b"</script>").matched()) {
@ -23,10 +26,13 @@ fn parse_comment_single(proc: &mut Processor) -> ProcessingResult<()> {
 }

 fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
-    chain!(proc.match_seq(b"/*").expect().keep());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_seq(b"/*").expect().keep());
+    } else {
+        proc.skip_amount_expect(2);
+    };

    // Comment can end at closing </script>.
-    // TODO WARNING: Closing tag must not contain whitespace.
    // TODO Optimise
    while !chain!(proc.match_seq(b"*/").keep().matched()) {
        if chain!(proc.match_seq(b"</script>").matched()) {
@ -40,7 +46,11 @@ fn parse_comment_multi(proc: &mut Processor) -> ProcessingResult<()> {
 }

 fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
-    let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
+    let delim = if cfg!(debug_assertions) {
+        chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
+    } else {
+        proc.accept_expect()
+    };

    let mut escaping = false;

@ -69,7 +79,11 @@ fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
 }

 fn parse_template(proc: &mut Processor) -> ProcessingResult<()> {
-    chain!(proc.match_char(b'`').expect().keep());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_char(b'`').expect().keep());
+    } else {
+        proc.skip_expect();
+    };

    let mut escaping = false;

--- a/src/unit/script/text.rs
+++ b/src/unit/script/text.rs
@ -14,12 +14,11 @@ pub fn process_text_script(proc: &mut Processor) -> ProcessingResult<()> {
            comment_has_unclosed_script = false;
            in_comment = false;
        } else if in_comment && chain!(proc.match_seq(b"<script").keep().matched()) {
-            // TODO DOC Case sensitive, no space before tag name, nothing else in tag.
+            // TODO DOC Case sensitive, nothing else in tag.
            // TODO Opening tag can have attributes, whitespace, etc.
            chain!(proc.match_char(b'>').require()?.keep());
            comment_has_unclosed_script = true;
        } else if chain!(proc.match_seq(b"</script").matched()) {
-            // TODO DOC Case sensitive, no space before tag name, nothing else in tag.
            if !comment_has_unclosed_script {
                break;
            }
--- a/src/unit/style.rs
+++ b/src/unit/style.rs
@ -9,7 +9,11 @@ fn is_string_delimiter(c: u8) -> bool {
 }

 fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
-    chain!(proc.match_seq(b"/*").expect().keep());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_seq(b"/*").expect().keep());
+    } else {
+        proc.skip_amount_expect(2);
+    };

    // Unlike script tags, style comments do NOT end at closing tag.
    while !chain!(proc.match_seq(b"*/").keep().matched()) {
@ -20,7 +24,11 @@ fn parse_comment(proc: &mut Processor) -> ProcessingResult<()> {
 }

 fn parse_string(proc: &mut Processor) -> ProcessingResult<()> {
-    let delim = chain!(proc.match_pred(is_string_delimiter).expect().keep().char());
+    let delim = if cfg!(debug_assertions) {
+        chain!(proc.match_pred(is_string_delimiter).expect().keep().char())
+    } else {
+        proc.accept_expect()
+    };

    let mut escaping = false;

--- a/src/unit/tag.rs
+++ b/src/unit/tag.rs
@ -42,10 +42,14 @@ enum TagType {
 }

 pub fn process_tag(proc: &mut Processor) -> ProcessingResult<()> {
-    // TODO Minify opening and closing tag whitespace before name and after name/last attr.
+    // TODO Minify opening and closing tag whitespace after name and last attr.
    // TODO DOC No checking if opening and closing names match.
    // Expect to be currently at an opening tag.
-    chain!(proc.match_char(b'<').expect().keep());
+    if cfg!(debug_assertions) {
+        chain!(proc.match_char(b'<').expect().keep());
+    } else {
+        proc.skip_expect();
+    };
    // May not be valid tag name at current position, so require instead of expect.
    let opening_name_range = chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("tag name")?.keep().out_range());