minify-html/src/unit/attr/mod.rs

72 lines
2.7 KiB
Rust
Raw Normal View History

use phf::{phf_set, Set};
use crate::err::ProcessingResult;
2019-12-27 05:52:49 -05:00
use crate::proc::{Processor, ProcessorRange};
2019-12-25 04:44:51 -05:00
use crate::spec::codepoint::is_control;
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue};
2019-12-25 04:44:51 -05:00
mod value;
2020-01-07 08:38:42 -05:00
include!(concat!(env!("OUT_DIR"), "/gen_boolean_attrs.rs"));
2019-12-25 04:44:51 -05:00
static COLLAPSIBLE_AND_TRIMMABLE_ATTRS: Set<&'static [u8]> = phf_set! {
b"class",
};
#[derive(Clone, Copy, Eq, PartialEq)]
pub enum AttrType {
Quoted,
Unquoted,
NoValue,
}
2019-12-27 05:52:49 -05:00
pub struct ProcessedAttr {
pub name: ProcessorRange,
pub typ: AttrType,
pub value: Option<ProcessorRange>,
}
2019-12-25 04:44:51 -05:00
// Characters allowed in an attribute name.
// NOTE: Unicode noncharacters not tested.
// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
fn is_name_char(c: u8) -> bool {
match c {
b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
c => !is_control(c),
}
}
2020-01-07 08:38:42 -05:00
pub fn process_attr(proc: &mut Processor, element: ProcessorRange) -> ProcessingResult<ProcessedAttr> {
// It's possible to expect attribute name but not be called at an attribute, e.g. due to whitespace between name and
// value, which causes name to be considered boolean attribute and `=` to be start of new (invalid) attribute name.
let name = chain!(proc.match_while_pred(is_name_char).require_with_reason("attribute name")?.keep().range());
2020-01-07 08:38:42 -05:00
let is_boolean = BOOLEAN_ATTRS.get(&proc[name]).filter(|elems| elems.contains(&proc[element])).is_some();
let after_name = proc.checkpoint();
2019-12-25 04:44:51 -05:00
2019-12-27 05:52:49 -05:00
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
2019-12-25 07:29:18 -05:00
let has_value = chain!(proc.match_char(b'=').keep().matched());
2019-12-25 04:44:51 -05:00
2019-12-27 05:52:49 -05:00
let (typ, value) = if !has_value {
(AttrType::NoValue, None)
2019-12-25 04:44:51 -05:00
} else {
2020-01-07 08:38:42 -05:00
// TODO Don't process if going to erase anyway.
let val = process_attr_value(proc, should_collapse_and_trim_value_ws)?;
if is_boolean {
proc.erase_written(after_name);
(AttrType::NoValue, None)
} else {
match val {
ProcessedAttrValue { value: None, .. } => {
// Value is empty, which is equivalent to no value, so discard `=` and any quotes.
proc.erase_written(after_name);
(AttrType::NoValue, None)
}
ProcessedAttrValue { delimiter: DelimiterType::Unquoted, value } => (AttrType::Unquoted, value),
ProcessedAttrValue { delimiter: DelimiterType::Double, value } | ProcessedAttrValue { delimiter: DelimiterType::Single, value } => (AttrType::Quoted, value),
}
}
2019-12-27 05:52:49 -05:00
};
Ok(ProcessedAttr { name, typ, value })
2019-12-25 04:44:51 -05:00
}