Improve closing tag escaping

This commit is contained in:
Wilson Lin 2021-04-15 01:36:06 +10:00
parent 8cf4d27319
commit 98cb36c9c2
4 changed files with 53 additions and 26 deletions

View File

@ -8,7 +8,6 @@ use memchr::memchr;
#[cfg(feature = "js-esbuild")]
use {
crossbeam::sync::WaitGroup,
esbuild_rs::TransformResult,
std::sync::{Arc, Mutex},
};
@ -54,7 +53,7 @@ pub enum MatchAction {
#[cfg(feature = "js-esbuild")]
pub struct EsbuildSection {
pub src: ProcessorRange,
pub result: TransformResult,
pub escaped: Vec<u8>,
}
// Processing state of a file. Single use only; create one per processing.
@ -381,21 +380,10 @@ impl<'d> Processor<'d> {
// the write pointer after previous compaction.
// If there are no script sections, then we get self.write_next which will be returned.
let mut write_next = results.get(0).map_or(self.write_next, |r| r.src.start);
for (i, EsbuildSection { result, src }) in results.iter().enumerate() {
for (i, EsbuildSection { escaped: min_code, src }) in results.iter().enumerate() {
// Resulting minified JS/CSS to write.
// TODO Handle other forms:
// 1 < /script/.exec(a).length
// ` ${` ${a</script/} `} `
// // </script>
// /* </script>
// Considerations:
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
let min_code = result.code.as_str().trim().replace("</script", "<\\/script");
let min_len = if min_code.len() < src.len() {
self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code.as_bytes());
self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code);
min_code.len()
} else {
// If minified result is actually longer than source, then write source instead.

View File

@ -454,7 +454,9 @@ fn test_js_minification() {
#[test]
fn test_js_minification_unintentional_closing_tag() {
eval_with_js_min(br#"<script>let a = "</" + "script>";</script>"#, br#"<script>let a="<\/script>";</script>"#);
eval_with_js_min(br#"<script>let a = "</S" + "cRiPT>";</script>"#, br#"<script>let a="<\/ScRiPT>";</script>"#);
eval_with_js_min(br#"<script>let a = "\u003c/script>";</script>"#, br#"<script>let a="<\/script>";</script>"#);
eval_with_js_min(br#"<script>let a = "\u003c/scrIPt>";</script>"#, br#"<script>let a="<\/scrIPt>";</script>"#);
}
#[cfg(feature = "js-esbuild")]

View File

@ -46,9 +46,31 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingRe
unsafe {
esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| {
let mut guard = results.lock().unwrap();
// TODO Handle other forms:
// 1 < /script/.exec(a).length
// ` ${` ${a</script/} `} `
// // </script>
// /* </script>
// Considerations:
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
let mut escaped = Vec::<u8>::new();
// SCRIPT_END must be case insensitive.
SCRIPT_END.replace_all_with_bytes(
result.code.as_str().trim().as_bytes(),
&mut escaped,
|_, orig, dst| {
dst.extend(b"<\\/");
// Keep original case.
dst.extend(&orig[2..]);
true
},
);
guard.push(EsbuildSection {
src,
result,
escaped,
});
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.

View File

@ -1,17 +1,19 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use lazy_static::lazy_static;
#[cfg(feature = "js-esbuild")]
use {
crate::proc::checkpoint::WriteCheckpoint,
crate::proc::EsbuildSection,
esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder},
std::sync::Arc,
};
use crate::Cfg;
use crate::err::ProcessingResult;
use crate::proc::MatchAction::*;
use crate::proc::MatchMode::*;
use crate::proc::Processor;
#[cfg(feature = "js-esbuild")]
use {
std::sync::Arc,
esbuild_rs::{Loader, TransformOptionsBuilder, TransformOptions},
crate::proc::EsbuildSection,
crate::proc::checkpoint::WriteCheckpoint,
};
use crate::Cfg;
#[cfg(feature = "js-esbuild")]
lazy_static! {
@ -32,7 +34,7 @@ lazy_static! {
#[inline(always)]
pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
#[cfg(feature = "js-esbuild")]
let start = WriteCheckpoint::new(proc);
let start = WriteCheckpoint::new(proc);
proc.require_not_at_end()?;
proc.m(WhileNotSeq(&STYLE_END), Keep);
// `process_tag` will require closing tag.
@ -45,9 +47,22 @@ pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
unsafe {
esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| {
let mut guard = results.lock().unwrap();
// TODO Are there other places that can have unintentional closing tags?
let mut escaped = Vec::<u8>::new();
// STYLE_END must be case insensitive.
STYLE_END.replace_all_with_bytes(
result.code.as_str().trim().as_bytes(),
&mut escaped,
|_, orig, dst| {
dst.extend(b"<\\/");
// Keep original case.
dst.extend(&orig[2..]);
true
},
);
guard.push(EsbuildSection {
src,
result,
escaped,
});
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.