Improve closing tag escaping
This commit is contained in:
parent
8cf4d27319
commit
98cb36c9c2
|
@ -8,7 +8,6 @@ use memchr::memchr;
|
|||
#[cfg(feature = "js-esbuild")]
|
||||
use {
|
||||
crossbeam::sync::WaitGroup,
|
||||
esbuild_rs::TransformResult,
|
||||
std::sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
|
@ -54,7 +53,7 @@ pub enum MatchAction {
|
|||
#[cfg(feature = "js-esbuild")]
|
||||
pub struct EsbuildSection {
|
||||
pub src: ProcessorRange,
|
||||
pub result: TransformResult,
|
||||
pub escaped: Vec<u8>,
|
||||
}
|
||||
|
||||
// Processing state of a file. Single use only; create one per processing.
|
||||
|
@ -381,21 +380,10 @@ impl<'d> Processor<'d> {
|
|||
// the write pointer after previous compaction.
|
||||
// If there are no script sections, then we get self.write_next which will be returned.
|
||||
let mut write_next = results.get(0).map_or(self.write_next, |r| r.src.start);
|
||||
for (i, EsbuildSection { result, src }) in results.iter().enumerate() {
|
||||
for (i, EsbuildSection { escaped: min_code, src }) in results.iter().enumerate() {
|
||||
// Resulting minified JS/CSS to write.
|
||||
// TODO Handle other forms:
|
||||
// 1 < /script/.exec(a).length
|
||||
// ` ${` ${a</script/} `} `
|
||||
// // </script>
|
||||
// /* </script>
|
||||
// Considerations:
|
||||
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
|
||||
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
|
||||
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
|
||||
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
|
||||
let min_code = result.code.as_str().trim().replace("</script", "<\\/script");
|
||||
let min_len = if min_code.len() < src.len() {
|
||||
self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code.as_bytes());
|
||||
self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code);
|
||||
min_code.len()
|
||||
} else {
|
||||
// If minified result is actually longer than source, then write source instead.
|
||||
|
|
|
@ -454,7 +454,9 @@ fn test_js_minification() {
|
|||
#[test]
|
||||
fn test_js_minification_unintentional_closing_tag() {
|
||||
eval_with_js_min(br#"<script>let a = "</" + "script>";</script>"#, br#"<script>let a="<\/script>";</script>"#);
|
||||
eval_with_js_min(br#"<script>let a = "</S" + "cRiPT>";</script>"#, br#"<script>let a="<\/ScRiPT>";</script>"#);
|
||||
eval_with_js_min(br#"<script>let a = "\u003c/script>";</script>"#, br#"<script>let a="<\/script>";</script>"#);
|
||||
eval_with_js_min(br#"<script>let a = "\u003c/scrIPt>";</script>"#, br#"<script>let a="<\/scrIPt>";</script>"#);
|
||||
}
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
|
|
|
@ -46,9 +46,31 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingRe
|
|||
unsafe {
|
||||
esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| {
|
||||
let mut guard = results.lock().unwrap();
|
||||
// TODO Handle other forms:
|
||||
// 1 < /script/.exec(a).length
|
||||
// ` ${` ${a</script/} `} `
|
||||
// // </script>
|
||||
// /* </script>
|
||||
// Considerations:
|
||||
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
|
||||
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
|
||||
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
|
||||
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
|
||||
let mut escaped = Vec::<u8>::new();
|
||||
// SCRIPT_END must be case insensitive.
|
||||
SCRIPT_END.replace_all_with_bytes(
|
||||
result.code.as_str().trim().as_bytes(),
|
||||
&mut escaped,
|
||||
|_, orig, dst| {
|
||||
dst.extend(b"<\\/");
|
||||
// Keep original case.
|
||||
dst.extend(&orig[2..]);
|
||||
true
|
||||
},
|
||||
);
|
||||
guard.push(EsbuildSection {
|
||||
src,
|
||||
result,
|
||||
escaped,
|
||||
});
|
||||
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
|
||||
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
|
||||
|
|
|
@ -1,17 +1,19 @@
|
|||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use {
|
||||
crate::proc::checkpoint::WriteCheckpoint,
|
||||
crate::proc::EsbuildSection,
|
||||
esbuild_rs::{Loader, TransformOptions, TransformOptionsBuilder},
|
||||
std::sync::Arc,
|
||||
};
|
||||
|
||||
use crate::Cfg;
|
||||
use crate::err::ProcessingResult;
|
||||
use crate::proc::MatchAction::*;
|
||||
use crate::proc::MatchMode::*;
|
||||
use crate::proc::Processor;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use {
|
||||
std::sync::Arc,
|
||||
esbuild_rs::{Loader, TransformOptionsBuilder, TransformOptions},
|
||||
crate::proc::EsbuildSection,
|
||||
crate::proc::checkpoint::WriteCheckpoint,
|
||||
};
|
||||
use crate::Cfg;
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
lazy_static! {
|
||||
|
@ -32,7 +34,7 @@ lazy_static! {
|
|||
#[inline(always)]
|
||||
pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
let start = WriteCheckpoint::new(proc);
|
||||
let start = WriteCheckpoint::new(proc);
|
||||
proc.require_not_at_end()?;
|
||||
proc.m(WhileNotSeq(&STYLE_END), Keep);
|
||||
// `process_tag` will require closing tag.
|
||||
|
@ -45,9 +47,22 @@ pub fn process_style(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
|
|||
unsafe {
|
||||
esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| {
|
||||
let mut guard = results.lock().unwrap();
|
||||
// TODO Are there other places that can have unintentional closing tags?
|
||||
let mut escaped = Vec::<u8>::new();
|
||||
// STYLE_END must be case insensitive.
|
||||
STYLE_END.replace_all_with_bytes(
|
||||
result.code.as_str().trim().as_bytes(),
|
||||
&mut escaped,
|
||||
|_, orig, dst| {
|
||||
dst.extend(b"<\\/");
|
||||
// Keep original case.
|
||||
dst.extend(&orig[2..]);
|
||||
true
|
||||
},
|
||||
);
|
||||
guard.push(EsbuildSection {
|
||||
src,
|
||||
result,
|
||||
escaped,
|
||||
});
|
||||
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
|
||||
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
|
||||
|
|
Loading…
Reference in New Issue