Let esbuild handle closing script and style tag escaping

This commit is contained in:
Wilson Lin 2021-08-08 17:18:50 +10:00
parent 920133fc18
commit 7612244a10
5 changed files with 10 additions and 37 deletions

View File

@ -5,6 +5,7 @@
<title><title></titl></title> <title><title></titl></title>
</head> </head>
<body> <body>
<ScrIPT></scRIpt>let x = 1;
<div =x =x=1 ===>&l<!-- -->t;</div> <div =x =x=1 ===>&l<!-- -->t;</div>
<div>x<!ac > a <!ac > b <!ac > c</div> <div>x<!ac > a <!ac > b <!ac > c</div>
<div>x<? ?> a <? > b <? > c</div> <div>x<? ?> a <? > b <? > c</div>

View File

@ -312,7 +312,6 @@ pub fn minify_attr(
&mut value_raw_wrapped_min, &mut value_raw_wrapped_min,
&value_raw_wrapped, &value_raw_wrapped,
&MINIFY_CSS_TRANSFORM_OPTIONS.clone(), &MINIFY_CSS_TRANSFORM_OPTIONS.clone(),
None,
); );
// If input was invalid, wrapper syntax may not exist anymore. // If input was invalid, wrapper syntax may not exist anymore.
if value_raw_wrapped_min.starts_with(b"x{") { if value_raw_wrapped_min.starts_with(b"x{") {

View File

@ -1,8 +1,7 @@
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
use { use {
crate::minify::esbuild::minify_using_esbuild, crate::minify::esbuild::minify_using_esbuild,
aho_corasick::{AhoCorasick, AhoCorasickBuilder}, esbuild_rs::{Charset, LegalComments, Loader, SourceMap, TransformOptions, TransformOptionsBuilder},
esbuild_rs::{Charset, Loader, SourceMap, TransformOptions, TransformOptionsBuilder},
lazy_static::lazy_static, lazy_static::lazy_static,
std::sync::Arc, std::sync::Arc,
}; };
@ -11,9 +10,6 @@ use crate::cfg::Cfg;
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
lazy_static! { lazy_static! {
static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(&["</style"]);
pub static ref MINIFY_CSS_TRANSFORM_OPTIONS: Arc<TransformOptions> = { pub static ref MINIFY_CSS_TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new(); let mut builder = TransformOptionsBuilder::new();
builder.charset = Charset::UTF8; builder.charset = Charset::UTF8;
@ -41,7 +37,6 @@ pub fn minify_css(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
out, out,
code, code,
&MINIFY_CSS_TRANSFORM_OPTIONS.clone(), &MINIFY_CSS_TRANSFORM_OPTIONS.clone(),
Some(&STYLE_END),
); );
} }
} }

View File

@ -1,42 +1,20 @@
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
use {aho_corasick::AhoCorasick, crossbeam::sync::WaitGroup, esbuild_rs::TransformOptions}; use {crossbeam::sync::WaitGroup, esbuild_rs::TransformOptions};
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
// TODO The use of WG is ugly and we don't want to be multi-threaded; wait for Rust port esbuild-transform-rs. // TODO The use of WG is ugly and we don't want to be multi-threaded; wait for Rust port esbuild-transform-rs.
// `tag_to_escape` must be case insensitive if provided.
pub fn minify_using_esbuild( pub fn minify_using_esbuild(
out: &mut Vec<u8>, out: &mut Vec<u8>,
code: &[u8], code: &[u8],
transform_options: &TransformOptions, transform_options: &TransformOptions,
tag_to_escape: Option<&'static AhoCorasick>,
) { ) {
let wg = WaitGroup::new(); let wg = WaitGroup::new();
unsafe { unsafe {
let wg = wg.clone(); let wg = wg.clone();
// esbuild now officially handles escaping `</script` and `</style`.
esbuild_rs::transform_direct_unmanaged(code, transform_options, move |result| { esbuild_rs::transform_direct_unmanaged(code, transform_options, move |result| {
let min_code = result.code.as_str().trim().as_bytes(); let min_code = result.code.as_str().trim().as_bytes();
match tag_to_escape { out.extend_from_slice(min_code);
None => out.extend_from_slice(min_code),
// TODO (JS) Handle other forms:
// 1 < /script/.exec(a).length
// ` ${` ${a</script/} `} `
// // </script>
// /* </script>
// Considerations:
// - Need to parse strings (e.g. "", '', ``) so syntax within strings aren't mistakenly interpreted as code.
// - Need to be able to parse regex literals to determine string delimiters aren't actually characters in the regex.
// - Determining whether a slash is division or regex requires a full-blown JS parser to handle all cases (this is a well-known JS parsing problem).
// - `/</script` or `/</ script` are not valid JS so don't need to be handled.
// TODO (CSS) Are there other places that can have unintentional closing tags?
Some(tag_to_escape) => {
tag_to_escape.replace_all_with_bytes(min_code, out, |_, orig, dst| {
dst.extend(b"<\\/");
// Keep original case.
dst.extend(&orig[2..]);
true
})
}
}
drop(wg); drop(wg);
}); });
}; };

View File

@ -1,7 +1,6 @@
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
use { use {
crate::minify::esbuild::minify_using_esbuild, crate::minify::esbuild::minify_using_esbuild,
aho_corasick::{AhoCorasick, AhoCorasickBuilder},
esbuild_rs::{Charset, LegalComments, SourceMap, TransformOptions, TransformOptionsBuilder}, esbuild_rs::{Charset, LegalComments, SourceMap, TransformOptions, TransformOptionsBuilder},
lazy_static::lazy_static, lazy_static::lazy_static,
std::sync::Arc, std::sync::Arc,
@ -11,9 +10,6 @@ use crate::Cfg;
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
lazy_static! { lazy_static! {
static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(&["</script"]);
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = { static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new(); let mut builder = TransformOptionsBuilder::new();
builder.charset = Charset::UTF8; builder.charset = Charset::UTF8;
@ -36,6 +32,10 @@ pub fn minify_js(cfg: &Cfg, out: &mut Vec<u8>, code: &[u8]) {
if !cfg.minify_js { if !cfg.minify_js {
out.extend_from_slice(&code); out.extend_from_slice(&code);
} else { } else {
minify_using_esbuild(out, code, &TRANSFORM_OPTIONS.clone(), Some(&SCRIPT_END)); minify_using_esbuild(
out,
code,
&TRANSFORM_OPTIONS.clone(),
);
} }
} }