From 407acf01a6ab5561bc5edae4198fa94353f61736 Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Thu, 9 Jan 2020 00:34:59 +1100 Subject: [PATCH] Improve parsing attributes reliability --- src/unit/attr/mod.rs | 2 +- src/unit/attr/value.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/unit/attr/mod.rs b/src/unit/attr/mod.rs index ef330be..36b5896 100644 --- a/src/unit/attr/mod.rs +++ b/src/unit/attr/mod.rs @@ -39,7 +39,7 @@ fn is_name_char(c: u8) -> bool { pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> ProcessingResult { // It's possible to expect attribute name but not be called at an attribute, e.g. due to whitespace between name and // value, which causes name to be considered boolean attribute and `=` to be start of new (invalid) attribute name. - let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().range()); + let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().out_range()); let is_boolean = BOOLEAN_ATTRS.get(&proc[name]).filter(|elems| elems.contains(&proc[element])).is_some(); let after_name = proc.checkpoint(); diff --git a/src/unit/attr/value.rs b/src/unit/attr/value.rs index 2569425..9647a3e 100644 --- a/src/unit/attr/value.rs +++ b/src/unit/attr/value.rs @@ -134,9 +134,9 @@ impl Metrics { } pub fn skip_attr_value(proc: &mut Processor) -> ProcessingResult<()> { - let src_delimiter = chain!(proc.match_pred(is_attr_quote).require_with_reason("attribute value delimiter quote")?.discard().char()); + let src_delimiter = chain!(proc.match_pred(is_attr_quote).require_with_reason("attribute value opening delimiter quote")?.discard().char()); chain!(proc.match_while_not_char(src_delimiter).discard()); - chain!(proc.match_char(src_delimiter).require_with_reason("attribute value delimiter quote")?.discard()); + chain!(proc.match_char(src_delimiter).require_with_reason("attribute value closing delimiter quote")?.discard()); Ok(()) } @@ -163,7 +163,7 @@ pub struct ProcessedAttrValue { // Since the actual processed value would have a length equal or greater to it (e.g. it might be quoted, or some characters might get encoded), we can then read minimum value right to left and start writing from actual processed value length (which is calculated), quoting/encoding as necessary. pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: bool) -> ProcessingResult { let src_start = proc.checkpoint(); - let src_delimiter = chain!(proc.match_pred(is_attr_quote).require_with_reason("attribute value delimiter quote")?.discard().char()); + let src_delimiter = chain!(proc.match_pred(is_attr_quote).require_with_reason("attribute value opening delimiter quote")?.discard().char()); // Stage 1: read and collect metrics on attribute value characters. let mut metrics = Metrics { @@ -182,7 +182,7 @@ pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: boo let mut currently_first_char = true; loop { - let metrics_char_type = if chain!(proc.match_char(src_delimiter).discard().matched()) { + let metrics_char_type = if chain!(proc.match_char(src_delimiter).matched()) { // DO NOT BREAK HERE. More processing is done afterwards upon reaching end. CharType::End } else if chain!(proc.match_char(b'&').matched()) { @@ -241,7 +241,7 @@ pub fn process_attr_value(proc: &mut Processor, should_collapse_and_trim_ws: boo }; metrics.last_char_type = Some(metrics_char_type); }; - // Ending delimiter quote (if any) has already been discarded at this point. + chain!(proc.match_char(src_delimiter).require_with_reason("attribute value closing delimiter quote")?.discard()); let minimum_value = proc.written_range(src_start); // If minimum value is empty, return now before trying to read out of range later. // (Reading starts at one character before end of minimum value.)