From 98cb36c9c2cad0a3815bf336ff993f8263fe7e62 Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Thu, 15 Apr 2021 01:36:06 +1000 Subject: [PATCH] Improve closing tag escaping --- src/proc/mod.rs | 18 +++--------------- src/tests/mod.rs | 2 ++ src/unit/script.rs | 24 +++++++++++++++++++++++- src/unit/style.rs | 35 +++++++++++++++++++++++++---------- 4 files changed, 53 insertions(+), 26 deletions(-) diff --git a/src/proc/mod.rs b/src/proc/mod.rs index f276ec5..88ea152 100644 --- a/src/proc/mod.rs +++ b/src/proc/mod.rs @@ -8,7 +8,6 @@ use memchr::memchr; #[cfg(feature = "js-esbuild")] use { crossbeam::sync::WaitGroup, - esbuild_rs::TransformResult, std::sync::{Arc, Mutex}, }; @@ -54,7 +53,7 @@ pub enum MatchAction { #[cfg(feature = "js-esbuild")] pub struct EsbuildSection { pub src: ProcessorRange, - pub result: TransformResult, + pub escaped: Vec, } // Processing state of a file. Single use only; create one per processing. @@ -381,21 +380,10 @@ impl<'d> Processor<'d> { // the write pointer after previous compaction. // If there are no script sections, then we get self.write_next which will be returned. let mut write_next = results.get(0).map_or(self.write_next, |r| r.src.start); - for (i, EsbuildSection { result, src }) in results.iter().enumerate() { + for (i, EsbuildSection { escaped: min_code, src }) in results.iter().enumerate() { // Resulting minified JS/CSS to write. - // TODO Handle other forms: - // 1 < /script/.exec(a).length - // ` ${` ${a - // /* - // Considerations: - // - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code. - // - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex. - // - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem). - // - `/let a = "";"#, br#""#); + eval_with_js_min(br#""#, br#""#); eval_with_js_min(br#""#, br#""#); + eval_with_js_min(br#""#, br#""#); } #[cfg(feature = "js-esbuild")] diff --git a/src/unit/script.rs b/src/unit/script.rs index e519cf2..ac7711d 100644 --- a/src/unit/script.rs +++ b/src/unit/script.rs @@ -46,9 +46,31 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingRe unsafe { esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { let mut guard = results.lock().unwrap(); + // TODO Handle other forms: + // 1 < /script/.exec(a).length + // ` ${` ${a + // /* + // Considerations: + // - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code. + // - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex. + // - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem). + // - `/::new(); + // SCRIPT_END must be case insensitive. + SCRIPT_END.replace_all_with_bytes( + result.code.as_str().trim().as_bytes(), + &mut escaped, + |_, orig, dst| { + dst.extend(b"<\\/"); + // Keep original case. + dst.extend(&orig[2..]); + true + }, + ); guard.push(EsbuildSection { src, - result, + escaped, }); // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. diff --git a/src/unit/style.rs b/src/unit/style.rs index 0b1d99e..5f8819a 100644 --- a/src/unit/style.rs +++ b/src/unit/style.rs @@ -1,17 +1,19 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; use lazy_static::lazy_static; + +#[cfg(feature = "js-esbuild")] +use { + crate::proc::checkpoint::WriteCheckpoint, + crate::proc::EsbuildSection, + esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder}, + std::sync::Arc, +}; + +use crate::Cfg; use crate::err::ProcessingResult; use crate::proc::MatchAction::*; use crate::proc::MatchMode::*; use crate::proc::Processor; -#[cfg(feature = "js-esbuild")] -use { - std::sync::Arc, - esbuild_rs::{Loader, TransformOptionsBuilder, TransformOptions}, - crate::proc::EsbuildSection, - crate::proc::checkpoint::WriteCheckpoint, -}; -use crate::Cfg; #[cfg(feature = "js-esbuild")] lazy_static! { @@ -32,7 +34,7 @@ lazy_static! { #[inline(always)] pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> { #[cfg(feature = "js-esbuild")] - let start = WriteCheckpoint::new(proc); + let start = WriteCheckpoint::new(proc); proc.require_not_at_end()?; proc.m(WhileNotSeq(&STYLE_END), Keep); // `process_tag` will require closing tag. @@ -45,9 +47,22 @@ pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> { unsafe { esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { let mut guard = results.lock().unwrap(); + // TODO Are there other places that can have unintentional closing tags? + let mut escaped = Vec::::new(); + // STYLE_END must be case insensitive. + STYLE_END.replace_all_with_bytes( + result.code.as_str().trim().as_bytes(), + &mut escaped, + |_, orig, dst| { + dst.extend(b"<\\/"); + // Keep original case. + dst.extend(&orig[2..]); + true + }, + ); guard.push(EsbuildSection { src, - result, + escaped, }); // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.