minify-html/src/unit/attr/mod.rs

62 lines
2.1 KiB
Rust
Raw Normal View History

use phf::{phf_set, Set};
use crate::err::ProcessingResult;
2019-12-27 05:52:49 -05:00
use crate::proc::{Processor, ProcessorRange};
2019-12-25 04:44:51 -05:00
use crate::spec::codepoint::is_control;
use crate::unit::attr::value::{DelimiterType, process_attr_value, ProcessedAttrValue};
2019-12-25 04:44:51 -05:00
mod value;
static COLLAPSIBLE_AND_TRIMMABLE_ATTRS: Set<&'static [u8]> = phf_set! {
b"class",
};
#[derive(Clone, Copy, Eq, PartialEq)]
pub enum AttrType {
Quoted,
Unquoted,
NoValue,
}
2019-12-27 05:52:49 -05:00
pub struct ProcessedAttr {
pub name: ProcessorRange,
pub typ: AttrType,
pub value: Option<ProcessorRange>,
}
2019-12-25 04:44:51 -05:00
// Characters allowed in an attribute name.
// NOTE: Unicode noncharacters not tested.
// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name for spec.
fn is_name_char(c: u8) -> bool {
match c {
b' ' | b'"' | b'\'' | b'>' | b'/' | b'=' => false,
c => !is_control(c),
}
}
2019-12-27 05:52:49 -05:00
pub fn process_attr(proc: &mut Processor) -> ProcessingResult<ProcessedAttr> {
2019-12-25 04:44:51 -05:00
// Expect `process_attr` to be called at an attribute.
2019-12-27 05:52:49 -05:00
let name = chain!(proc.match_while_pred(is_name_char).expect().keep().range());
let after_name = proc.checkpoint();
2019-12-25 04:44:51 -05:00
// TODO DOC Attr must be case sensitive
2019-12-27 05:52:49 -05:00
let should_collapse_and_trim_value_ws = COLLAPSIBLE_AND_TRIMMABLE_ATTRS.contains(&proc[name]);
2019-12-25 07:29:18 -05:00
let has_value = chain!(proc.match_char(b'=').keep().matched());
2019-12-25 04:44:51 -05:00
2019-12-27 05:52:49 -05:00
let (typ, value) = if !has_value {
(AttrType::NoValue, None)
2019-12-25 04:44:51 -05:00
} else {
match process_attr_value(proc, should_collapse_and_trim_value_ws)? {
2019-12-27 05:52:49 -05:00
ProcessedAttrValue { value: None, .. } => {
// Value is empty, which is equivalent to no value, so discard `=` and any quotes.
proc.erase_written(after_name);
2019-12-27 05:52:49 -05:00
(AttrType::NoValue, None)
}
2019-12-27 05:52:49 -05:00
ProcessedAttrValue { delimiter: DelimiterType::Unquoted, value } => (AttrType::Unquoted, value),
ProcessedAttrValue { delimiter: DelimiterType::Double, value } | ProcessedAttrValue { delimiter: DelimiterType::Single, value } => (AttrType::Quoted, value),
}
2019-12-27 05:52:49 -05:00
};
Ok(ProcessedAttr { name, typ, value })
2019-12-25 04:44:51 -05:00
}