Use aho-corasick for faster and simpler end tag matching
This commit is contained in:
parent
2542d6c24c
commit
009e91d094
|
@ -20,6 +20,7 @@ default = []
|
|||
js-esbuild = ["crossbeam", "esbuild-rs"]
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7"
|
||||
crossbeam = { version = "0.7", optional = true }
|
||||
esbuild-rs = { version = "0.2.1", optional = true }
|
||||
lazy_static = "1.4"
|
||||
|
|
|
@ -2,18 +2,21 @@ use core::fmt;
|
|||
use std::fmt::{Debug, Formatter};
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
use aho_corasick::AhoCorasick;
|
||||
|
||||
use crate::err::{Error, ErrorType, ProcessingResult, debug_repr};
|
||||
use crate::proc::MatchAction::*;
|
||||
use crate::proc::MatchMode::*;
|
||||
use crate::proc::range::ProcessorRange;
|
||||
use memchr::memchr;
|
||||
use crate::gen::codepoints::Lookup;
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use std::sync::{Arc, Mutex};
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use esbuild_rs::TransformResult;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use crossbeam::sync::WaitGroup;
|
||||
use {
|
||||
std::sync::{Arc, Mutex},
|
||||
crossbeam::sync::WaitGroup,
|
||||
esbuild_rs::TransformResult,
|
||||
};
|
||||
|
||||
pub mod checkpoint;
|
||||
pub mod entity;
|
||||
|
@ -37,6 +40,7 @@ pub enum MatchMode {
|
|||
WhileNotInLookup(&'static Lookup),
|
||||
|
||||
IsSeq(&'static [u8]),
|
||||
WhileNotSeq(&'static AhoCorasick),
|
||||
}
|
||||
|
||||
pub enum MatchAction {
|
||||
|
@ -183,6 +187,7 @@ impl<'d> Processor<'d> {
|
|||
WhileNotPred(p) => self._many(|n| !p(n)),
|
||||
|
||||
IsSeq(seq) => self._maybe_read_slice_offset(0, seq.len()).filter(|src| *src == seq).map_or(0, |_| seq.len()),
|
||||
WhileNotSeq(seq) => seq.find(&self.code[self.read_next..]).map_or(self._remaining(), |m| m.start()),
|
||||
};
|
||||
// If keeping, match will be available in written range (which is better as source might eventually get overwritten).
|
||||
// If discarding, then only option is source range.
|
||||
|
|
|
@ -1,18 +1,17 @@
|
|||
use lazy_static::lazy_static;
|
||||
use aho_corasick::AhoCorasick;
|
||||
use crate::cfg::Cfg;
|
||||
use crate::err::ProcessingResult;
|
||||
use crate::proc::MatchAction::*;
|
||||
use crate::proc::MatchMode::*;
|
||||
use crate::proc::Processor;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use crate::proc::JsMinSection;
|
||||
use crate::cfg::Cfg;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use crate::proc::checkpoint::Checkpoint;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use esbuild_rs::{TransformOptionsBuilder, TransformOptions};
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use std::sync::Arc;
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
use lazy_static::lazy_static;
|
||||
use {
|
||||
std::sync::Arc,
|
||||
esbuild_rs::{TransformOptionsBuilder, TransformOptions},
|
||||
crate::proc::JsMinSection,
|
||||
crate::proc::checkpoint::Checkpoint,
|
||||
};
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
lazy_static! {
|
||||
|
@ -25,39 +24,36 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCRIPT_END: AhoCorasick = AhoCorasick::new(&["</script"]);
|
||||
}
|
||||
|
||||
pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingResult<()> {
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
let start = Checkpoint::new(proc);
|
||||
loop {
|
||||
proc.require_not_at_end()?;
|
||||
// Use fast memchr. Unfortunately all characters in "</script>" are common in JS code.
|
||||
proc.m(WhileNotChar(b'<'), Keep);
|
||||
// `process_tag` will require closing tag.
|
||||
if proc.m(IsSeq(b"</script"), MatchOnly).nonempty() {
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
if js && cfg.minify_js {
|
||||
let (wg, results) = proc.new_script_section();
|
||||
let src = start.written_range(proc);
|
||||
unsafe {
|
||||
esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| {
|
||||
let mut guard = results.lock().unwrap();
|
||||
guard.push(JsMinSection {
|
||||
src,
|
||||
result,
|
||||
});
|
||||
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
|
||||
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
|
||||
drop(guard);
|
||||
drop(results);
|
||||
drop(wg);
|
||||
});
|
||||
};
|
||||
return Ok(());
|
||||
};
|
||||
break;
|
||||
proc.require_not_at_end()?;
|
||||
proc.m(WhileNotSeq(&SCRIPT_END), Keep);
|
||||
// `process_tag` will require closing tag.
|
||||
|
||||
#[cfg(feature = "js-esbuild")]
|
||||
if js && cfg.minify_js {
|
||||
let (wg, results) = proc.new_script_section();
|
||||
let src = start.written_range(proc);
|
||||
unsafe {
|
||||
esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| {
|
||||
let mut guard = results.lock().unwrap();
|
||||
guard.push(JsMinSection {
|
||||
src,
|
||||
result,
|
||||
});
|
||||
// Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish
|
||||
// waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function.
|
||||
drop(guard);
|
||||
drop(results);
|
||||
drop(wg);
|
||||
});
|
||||
};
|
||||
// Consume '<'.
|
||||
proc.accept_expect();
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,19 +1,18 @@
|
|||
use lazy_static::lazy_static;
|
||||
use aho_corasick::AhoCorasick;
|
||||
use crate::err::ProcessingResult;
|
||||
use crate::proc::MatchAction::*;
|
||||
use crate::proc::MatchMode::*;
|
||||
use crate::proc::Processor;
|
||||
|
||||
lazy_static! {
|
||||
static ref STYLE_END: AhoCorasick = AhoCorasick::new(&["</style"]);
|
||||
}
|
||||
|
||||
pub fn process_style(proc: &mut Processor) -> ProcessingResult<()> {
|
||||
loop {
|
||||
proc.require_not_at_end()?;
|
||||
// Use fast memchr.
|
||||
proc.m(WhileNotChar(b'<'), Keep);
|
||||
// `process_tag` will require closing tag.
|
||||
if proc.m(IsSeq(b"</style"), MatchOnly).nonempty() {
|
||||
break;
|
||||
};
|
||||
// Consume '<'.
|
||||
proc.accept_expect();
|
||||
};
|
||||
proc.require_not_at_end()?;
|
||||
proc.m(WhileNotSeq(&STYLE_END), Keep);
|
||||
// `process_tag` will require closing tag.
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue