diff --git a/src/onepass/cfg/mod.rs b/src/onepass/cfg/mod.rs new file mode 100644 index 0000000..92ce04e --- /dev/null +++ b/src/onepass/cfg/mod.rs @@ -0,0 +1,17 @@ +/// Configuration settings that can be adjusted and passed to a minification function to change the +/// minification approach. +pub struct Cfg { + /// If enabled, JavaScript in ` + // /* + // Considerations: + // - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code. + // - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex. + // - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem). + // - `/::new(); + // SCRIPT_END must be case insensitive. + SCRIPT_END.replace_all_with_bytes( + result.code.as_str().trim().as_bytes(), + &mut escaped, + |_, orig, dst| { + dst.extend(b"<\\/"); + // Keep original case. + dst.extend(&orig[2..]); + true + }, + ); + guard.push(EsbuildSection { + src, + escaped, + }); + // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish + // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. + drop(guard); + drop(results); + drop(wg); + }); + }; + }; + + Ok(()) +} diff --git a/src/onepass/unit/style.rs b/src/onepass/unit/style.rs new file mode 100644 index 0000000..5f8819a --- /dev/null +++ b/src/onepass/unit/style.rs @@ -0,0 +1,77 @@ +use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; +use lazy_static::lazy_static; + +#[cfg(feature = "js-esbuild")] +use { + crate::proc::checkpoint::WriteCheckpoint, + crate::proc::EsbuildSection, + esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder}, + std::sync::Arc, +}; + +use crate::Cfg; +use crate::err::ProcessingResult; +use crate::proc::MatchAction::*; +use crate::proc::MatchMode::*; +use crate::proc::Processor; + +#[cfg(feature = "js-esbuild")] +lazy_static! { + static ref TRANSFORM_OPTIONS: Arc = { + let mut builder = TransformOptionsBuilder::new(); + builder.loader = Loader::CSS; + builder.minify_identifiers = true; + builder.minify_syntax = true; + builder.minify_whitespace = true; + builder.build() + }; +} + +lazy_static! { + static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new().ascii_case_insensitive(true).build(&[" ProcessingResult<()> { + #[cfg(feature = "js-esbuild")] + let start = WriteCheckpoint::new(proc); + proc.require_not_at_end()?; + proc.m(WhileNotSeq(&STYLE_END), Keep); + // `process_tag` will require closing tag. + + // TODO This is copied from script.rs. + #[cfg(feature = "js-esbuild")] + if cfg.minify_css { + let (wg, results) = proc.new_esbuild_section(); + let src = start.written_range(proc); + unsafe { + esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { + let mut guard = results.lock().unwrap(); + // TODO Are there other places that can have unintentional closing tags? + let mut escaped = Vec::::new(); + // STYLE_END must be case insensitive. + STYLE_END.replace_all_with_bytes( + result.code.as_str().trim().as_bytes(), + &mut escaped, + |_, orig, dst| { + dst.extend(b"<\\/"); + // Keep original case. + dst.extend(&orig[2..]); + true + }, + ); + guard.push(EsbuildSection { + src, + escaped, + }); + // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish + // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. + drop(guard); + drop(results); + drop(wg); + }); + }; + }; + + Ok(()) +} diff --git a/src/onepass/unit/tag.rs b/src/onepass/unit/tag.rs new file mode 100644 index 0000000..45290b2 --- /dev/null +++ b/src/onepass/unit/tag.rs @@ -0,0 +1,245 @@ +use lazy_static::lazy_static; +use std::collections::HashSet; +use crate::err::{ErrorType, ProcessingResult}; +use crate::proc::checkpoint::{WriteCheckpoint, ReadCheckpoint}; +use crate::proc::MatchAction::*; +use crate::proc::MatchMode::*; +use crate::proc::Processor; +use crate::proc::range::ProcessorRange; +use crate::spec::tag::void::VOID_TAGS; +use crate::unit::attr::{AttrType, process_attr, ProcessedAttr}; +use crate::unit::content::process_content; +use crate::unit::script::process_script; +use crate::unit::style::process_style; +use crate::gen::attrs::{ATTRS, AttributeMinification}; +use crate::spec::tag::ns::Namespace; +use crate::gen::codepoints::{TAG_NAME_CHAR, WHITESPACE}; +use crate::cfg::Cfg; +use crate::spec::tag::omission::{can_omit_as_last_node, can_omit_as_before}; + +lazy_static! { + pub static ref JAVASCRIPT_MIME_TYPES: HashSet<&'static [u8]> = { + let mut s = HashSet::<&'static [u8]>::new(); + s.insert(b"application/ecmascript"); + s.insert(b"application/javascript"); + s.insert(b"application/x-ecmascript"); + s.insert(b"application/x-javascript"); + s.insert(b"text/ecmascript"); + s.insert(b"text/javascript"); + s.insert(b"text/javascript1.0"); + s.insert(b"text/javascript1.1"); + s.insert(b"text/javascript1.2"); + s.insert(b"text/javascript1.3"); + s.insert(b"text/javascript1.4"); + s.insert(b"text/javascript1.5"); + s.insert(b"text/jscript"); + s.insert(b"text/livescript"); + s.insert(b"text/x-ecmascript"); + s.insert(b"text/x-javascript"); + s + }; +} + +#[derive(Copy, Clone)] +enum TagType { + ScriptJs, + ScriptData, + Style, + Other, +} + +#[derive(Copy, Clone)] +pub struct MaybeClosingTag(Option); + +impl MaybeClosingTag { + #[inline(always)] + pub fn none() -> MaybeClosingTag { + MaybeClosingTag(None) + } + + #[inline(always)] + pub fn write(&mut self, proc: &mut Processor) -> () { + proc.write_slice(b"'); + } + + #[inline(always)] + pub fn write_if_exists(&mut self, proc: &mut Processor) -> bool { + self.0.take().filter(|tag| { + proc.write_slice(b"'); + true + }).is_some() + } + + #[inline(always)] + pub fn exists(&self) -> bool { + self.0.is_some() + } + + #[inline(always)] + pub fn exists_and bool>(&self, pred: F) -> bool { + match self.0 { + Some(range) => pred(range), + None => false, + } + } + + #[inline(always)] + pub fn replace(&mut self, tag: MaybeClosingTag) -> () { + self.0 = tag.0; + } +} + +// TODO Comment param `prev_sibling_closing_tag`. +pub fn process_tag( + proc: &mut Processor, + cfg: &Cfg, + ns: Namespace, + parent: Option, + descendant_of_pre: bool, + mut prev_sibling_closing_tag: MaybeClosingTag, + source_tag_name: ProcessorRange, +) -> ProcessingResult { + if prev_sibling_closing_tag.exists_and(|prev_tag| !can_omit_as_before(proc, Some(prev_tag), source_tag_name)) { + prev_sibling_closing_tag.write(proc); + }; + // Write initially skipped left chevron. + proc.write(b'<'); + // Write previously skipped name and use written code as range (otherwise source code will eventually be overwritten). + let tag_name = proc.write_range(source_tag_name); + + let mut tag_type = match &proc[tag_name] { + // Unless non-JS MIME `type` is provided, `script` tags contain JS. + b"script" => TagType::ScriptJs, + b"style" => TagType::Style, + _ => TagType::Other, + }; + + let mut last_attr_type: Option = None; + let mut self_closing = false; + let is_void_tag = VOID_TAGS.contains(&proc[tag_name]); + + loop { + // At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one). + proc.m(WhileInLookup(WHITESPACE), Discard); + + if proc.m(IsChar(b'>'), Keep).nonempty() { + // End of tag. + break; + } + + // Don't write self closing "/>" as it could be shortened to ">" if void tag. + self_closing = proc.m(IsSeq(b"/>"), Discard).nonempty(); + if self_closing { + break; + } + + // Mark attribute start in case we want to erase it completely. + let attr_checkpoint = WriteCheckpoint::new(proc); + let mut erase_attr = false; + + // Write space after tag name or unquoted/valueless attribute. + // Don't write after quoted. + // Handle rare case where file ends in opening tag before an attribute and no minification has been done yet, + // e.g. `<-` (yes, that's the entire file). + if proc.at_end() { + return Err(ErrorType::UnexpectedEnd); + }; + match last_attr_type { + Some(AttrType::Unquoted) | Some(AttrType::NoValue) | None => proc.write(b' '), + _ => {} + }; + + let ProcessedAttr { name, typ, value } = process_attr(proc, ns, tag_name)?; + match (tag_type, &proc[name]) { + // NOTE: We don't support multiple `type` attributes, so can't go from ScriptData => ScriptJs. + (TagType::ScriptJs, b"type") => { + // It's JS if the value is empty or one of `JAVASCRIPT_MIME_TYPES`. + let script_tag_type_is_js = value + .filter(|v| !JAVASCRIPT_MIME_TYPES.contains(&proc[*v])) + .is_none(); + if script_tag_type_is_js { + erase_attr = true; + } else { + // Tag does not contain JS, don't minify JS. + tag_type = TagType::ScriptData; + }; + } + (_, name) => { + // TODO Check if HTML tag before checking if attribute removal applies to all elements. + erase_attr = match (value, ATTRS.get(ns, &proc[tag_name], name)) { + (None, Some(AttributeMinification { redundant_if_empty: true, .. })) => true, + (Some(val), Some(AttributeMinification { default_value: Some(defval), .. })) => proc[val].eq(*defval), + _ => false, + }; + } + }; + if erase_attr { + attr_checkpoint.erase_written(proc); + } else { + last_attr_type = Some(typ); + }; + }; + + // TODO Self closing does not actually close for HTML elements, but might close for foreign elements. + // See spec for more details. + if self_closing || is_void_tag { + if self_closing { + // Write discarded tag closing characters. + if is_void_tag { + proc.write_slice(b">"); + } else { + if let Some(AttrType::Unquoted) = last_attr_type { + // Prevent `/` from being part of the value. + proc.write(b' '); + }; + proc.write_slice(b"/>"); + }; + }; + return Ok(MaybeClosingTag(None)); + }; + + let child_ns = if proc[tag_name].eq(b"svg") { + Namespace::Svg + } else { + ns + }; + + let mut closing_tag_omitted = false; + match tag_type { + TagType::ScriptData => process_script(proc, cfg, false)?, + TagType::ScriptJs => process_script(proc, cfg, true)?, + TagType::Style => process_style(proc, cfg)?, + _ => closing_tag_omitted = process_content(proc, cfg, child_ns, Some(tag_name), descendant_of_pre)?.closing_tag_omitted, + }; + + let can_omit_closing_tag = can_omit_as_last_node(proc, parent, tag_name); + if closing_tag_omitted || proc.at_end() && can_omit_closing_tag { + return Ok(MaybeClosingTag(None)); + }; + + let closing_tag_checkpoint = ReadCheckpoint::new(proc); + proc.m(IsSeq(b"'), Discard).require("closing tag end")?; + Ok(MaybeClosingTag(Some(tag_name))) + } +}