2019-12-29 05:53:49 -05:00
|
|
|
use phf::{phf_set, Set};
|
|
|
|
|
2019-12-25 21:47:18 -05:00
|
|
|
use crate::err::{ErrorType, ProcessingResult};
|
2019-12-27 05:52:49 -05:00
|
|
|
use crate::proc::{Processor, ProcessorRange};
|
2019-12-23 06:48:41 -05:00
|
|
|
use crate::spec::codepoint::{is_alphanumeric, is_whitespace};
|
2020-01-07 04:56:37 -05:00
|
|
|
use crate::spec::tag::omission::CLOSING_TAG_OMISSION_RULES;
|
2019-12-23 06:48:41 -05:00
|
|
|
use crate::spec::tag::void::VOID_TAGS;
|
2019-12-27 05:52:49 -05:00
|
|
|
use crate::unit::attr::{AttrType, process_attr, ProcessedAttr};
|
2019-12-25 04:44:51 -05:00
|
|
|
use crate::unit::content::process_content;
|
2019-12-27 05:52:49 -05:00
|
|
|
use crate::unit::script::js::process_js_script;
|
2019-12-29 05:53:49 -05:00
|
|
|
use crate::unit::script::text::process_text_script;
|
|
|
|
use crate::unit::style::process_style;
|
2019-12-27 05:52:49 -05:00
|
|
|
|
|
|
|
pub static JAVASCRIPT_MIME_TYPES: Set<&'static [u8]> = phf_set! {
|
|
|
|
b"application/ecmascript",
|
|
|
|
b"application/javascript",
|
|
|
|
b"application/x-ecmascript",
|
|
|
|
b"application/x-javascript",
|
|
|
|
b"text/ecmascript",
|
|
|
|
b"text/javascript",
|
|
|
|
b"text/javascript1.0",
|
|
|
|
b"text/javascript1.1",
|
|
|
|
b"text/javascript1.2",
|
|
|
|
b"text/javascript1.3",
|
|
|
|
b"text/javascript1.4",
|
|
|
|
b"text/javascript1.5",
|
|
|
|
b"text/jscript",
|
|
|
|
b"text/livescript",
|
|
|
|
b"text/x-ecmascript",
|
|
|
|
b"text/x-javascript",
|
|
|
|
};
|
2019-12-23 06:48:41 -05:00
|
|
|
|
|
|
|
// Tag names may only use ASCII alphanumerics. However, some people also use `:` and `-`.
|
|
|
|
// See https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name for spec.
|
|
|
|
fn is_valid_tag_name_char(c: u8) -> bool {
|
|
|
|
is_alphanumeric(c) || c == b':' || c == b'-'
|
|
|
|
}
|
|
|
|
|
2020-01-06 08:28:35 -05:00
|
|
|
#[derive(Copy, Clone)]
|
2019-12-26 01:16:13 -05:00
|
|
|
enum TagType {
|
|
|
|
Script,
|
|
|
|
Style,
|
|
|
|
Other,
|
|
|
|
}
|
|
|
|
|
2020-01-06 07:36:05 -05:00
|
|
|
pub struct ProcessedTag {
|
|
|
|
pub name: ProcessorRange,
|
|
|
|
pub closing_tag: Option<ProcessorRange>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ProcessedTag {
|
|
|
|
pub fn write_closing_tag(&self, proc: &mut Processor) -> () {
|
|
|
|
if let Some(tag) = self.closing_tag {
|
|
|
|
proc.write_range(tag);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO Comment param `prev_sibling_closing_tag`.
|
|
|
|
pub fn process_tag(proc: &mut Processor, prev_sibling_closing_tag: Option<ProcessedTag>) -> ProcessingResult<ProcessedTag> {
|
2019-12-29 19:33:49 -05:00
|
|
|
// TODO Minify opening and closing tag whitespace after name and last attr.
|
2019-12-25 07:29:18 -05:00
|
|
|
// TODO DOC No checking if opening and closing names match.
|
2019-12-25 04:44:51 -05:00
|
|
|
// Expect to be currently at an opening tag.
|
2019-12-29 19:33:49 -05:00
|
|
|
if cfg!(debug_assertions) {
|
2020-01-06 07:36:05 -05:00
|
|
|
chain!(proc.match_char(b'<').expect().discard());
|
2019-12-29 19:33:49 -05:00
|
|
|
} else {
|
2020-01-06 07:36:05 -05:00
|
|
|
proc.skip_expect();
|
2019-12-29 19:33:49 -05:00
|
|
|
};
|
2019-12-25 04:44:51 -05:00
|
|
|
// May not be valid tag name at current position, so require instead of expect.
|
2020-01-07 04:56:37 -05:00
|
|
|
let source_tag_name = chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("tag name")?.discard().range());
|
2020-01-06 07:36:05 -05:00
|
|
|
if let Some(prev_tag) = prev_sibling_closing_tag {
|
|
|
|
let can_omit = match CLOSING_TAG_OMISSION_RULES.get(&proc[prev_tag.name]) {
|
2020-01-07 04:56:37 -05:00
|
|
|
Some(rule) => rule.can_omit_as_prev(&proc[source_tag_name]),
|
2020-01-06 07:36:05 -05:00
|
|
|
_ => false,
|
|
|
|
};
|
|
|
|
if !can_omit {
|
|
|
|
prev_tag.write_closing_tag(proc);
|
|
|
|
};
|
|
|
|
};
|
|
|
|
// Write initially skipped left chevron.
|
|
|
|
proc.write(b'<');
|
|
|
|
// Write previously skipped name and use written code as range (otherwise source code will eventually be overwritten).
|
2020-01-07 04:56:37 -05:00
|
|
|
let tag_name = proc.write_range(source_tag_name);
|
2019-12-26 01:16:13 -05:00
|
|
|
|
2020-01-07 04:56:37 -05:00
|
|
|
let tag_type = match &proc[tag_name] {
|
2019-12-26 01:16:13 -05:00
|
|
|
b"script" => TagType::Script,
|
|
|
|
b"style" => TagType::Style,
|
|
|
|
_ => TagType::Other,
|
|
|
|
};
|
2019-12-23 06:48:41 -05:00
|
|
|
|
2019-12-26 00:17:57 -05:00
|
|
|
let mut last_attr_type: Option<AttrType> = None;
|
2019-12-23 06:48:41 -05:00
|
|
|
let mut self_closing = false;
|
2020-01-07 04:56:37 -05:00
|
|
|
let is_void_tag = VOID_TAGS.contains(&proc[tag_name]);
|
2020-01-06 08:28:35 -05:00
|
|
|
// Set to false if `tag_type` is Script and "type" attribute exists and has value that is not empty and not one of `JAVASCRIPT_MIME_TYPES`.
|
|
|
|
let mut script_tag_type_is_js: bool = true;
|
2019-12-23 06:48:41 -05:00
|
|
|
|
|
|
|
loop {
|
2019-12-26 08:23:33 -05:00
|
|
|
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).
|
2019-12-25 07:29:18 -05:00
|
|
|
let ws_accepted = chain!(proc.match_while_pred(is_whitespace).discard().matched());
|
2019-12-23 06:48:41 -05:00
|
|
|
|
2019-12-25 07:29:18 -05:00
|
|
|
if chain!(proc.match_char(b'>').keep().matched()) {
|
2019-12-23 06:48:41 -05:00
|
|
|
// End of tag.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-01-07 04:56:37 -05:00
|
|
|
// Don't write self closing "/>" as it could be shortened to ">" if void tag.
|
|
|
|
self_closing = chain!(proc.match_seq(b"/>").discard().matched());
|
2019-12-25 04:44:51 -05:00
|
|
|
if self_closing {
|
2019-12-23 06:48:41 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-12-26 00:17:57 -05:00
|
|
|
// This needs to be enforced as otherwise there would be difficulty in determining what is the end of a tag/attribute name/attribute value.
|
2019-12-23 06:48:41 -05:00
|
|
|
if !ws_accepted {
|
2019-12-25 07:29:18 -05:00
|
|
|
return Err(ErrorType::NoSpaceBeforeAttr);
|
2019-12-23 06:48:41 -05:00
|
|
|
}
|
|
|
|
|
2020-01-06 08:28:35 -05:00
|
|
|
// Mark attribute start in case we want to erase it completely.
|
|
|
|
let attr_checkpoint = proc.checkpoint();
|
|
|
|
let mut erase_attr = false;
|
|
|
|
|
2019-12-26 00:17:57 -05:00
|
|
|
// Write space after tag name or unquoted/valueless attribute.
|
2019-12-27 05:52:49 -05:00
|
|
|
// Don't write after unquoted.
|
2019-12-26 00:17:57 -05:00
|
|
|
match last_attr_type {
|
2019-12-26 08:23:33 -05:00
|
|
|
Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '),
|
|
|
|
_ => {}
|
2019-12-26 00:17:57 -05:00
|
|
|
};
|
2019-12-23 06:48:41 -05:00
|
|
|
|
2020-01-07 08:38:42 -05:00
|
|
|
let ProcessedAttr { name, typ, value } = process_attr(proc, tag_name)?;
|
2020-01-06 08:28:35 -05:00
|
|
|
match (tag_type, &proc[name]) {
|
|
|
|
(TagType::Script, b"type") => {
|
|
|
|
// It's JS if the value is empty or one of `JAVASCRIPT_MIME_TYPES`.
|
2020-01-07 04:56:37 -05:00
|
|
|
script_tag_type_is_js = value
|
|
|
|
.filter(|v| !JAVASCRIPT_MIME_TYPES.contains(&proc[*v]))
|
|
|
|
.is_none();
|
2020-01-06 08:28:35 -05:00
|
|
|
if script_tag_type_is_js {
|
|
|
|
erase_attr = true;
|
|
|
|
};
|
2020-01-07 04:56:37 -05:00
|
|
|
}
|
|
|
|
(TagType::Style, b"type") => {
|
|
|
|
erase_attr = true;
|
|
|
|
}
|
2019-12-29 05:53:49 -05:00
|
|
|
_ => {}
|
2019-12-27 05:52:49 -05:00
|
|
|
};
|
2020-01-06 08:28:35 -05:00
|
|
|
if erase_attr {
|
|
|
|
proc.erase_written(attr_checkpoint);
|
|
|
|
} else {
|
|
|
|
last_attr_type = Some(typ);
|
|
|
|
};
|
2019-12-25 04:44:51 -05:00
|
|
|
};
|
2019-12-23 06:48:41 -05:00
|
|
|
|
2020-01-07 04:56:37 -05:00
|
|
|
if self_closing || is_void_tag {
|
|
|
|
if self_closing {
|
|
|
|
// Write discarded tag closing characters.
|
|
|
|
if is_void_tag { proc.write_slice(b">"); } else { proc.write_slice(b"/>"); };
|
|
|
|
};
|
|
|
|
return Ok(ProcessedTag { name: tag_name, closing_tag: None });
|
2019-12-25 04:44:51 -05:00
|
|
|
};
|
2019-12-23 06:48:41 -05:00
|
|
|
|
2019-12-26 01:16:13 -05:00
|
|
|
match tag_type {
|
2020-01-07 04:56:37 -05:00
|
|
|
TagType::Script => if script_tag_type_is_js { process_js_script(proc)?; } else { process_text_script(proc)?; },
|
2019-12-26 01:16:13 -05:00
|
|
|
TagType::Style => process_style(proc)?,
|
2020-01-07 04:56:37 -05:00
|
|
|
_ => process_content(proc, Some(tag_name))?,
|
2019-12-25 04:44:51 -05:00
|
|
|
};
|
2019-12-23 06:48:41 -05:00
|
|
|
|
|
|
|
// Require closing tag for non-void.
|
2020-01-06 07:36:05 -05:00
|
|
|
let closing_tag = proc.checkpoint();
|
|
|
|
chain!(proc.match_seq(b"</").require()?.discard());
|
|
|
|
chain!(proc.match_while_pred(is_valid_tag_name_char).require_with_reason("closing tag name")?.discard());
|
2020-01-08 06:19:16 -05:00
|
|
|
chain!(proc.match_while_pred(is_whitespace).discard());
|
2020-01-06 07:36:05 -05:00
|
|
|
chain!(proc.match_char(b'>').require()?.discard());
|
2020-01-07 04:56:37 -05:00
|
|
|
Ok(ProcessedTag { name: tag_name, closing_tag: Some(proc.consumed_range(closing_tag)) })
|
2019-12-23 06:48:41 -05:00
|
|
|
}
|