Use named error struct instead of tuple; use async JS minification; fix old JS package names

This commit is contained in:
Wilson Lin 2020-07-21 17:57:39 +10:00
parent f42b2648d2
commit b967e338ca
8 changed files with 108 additions and 41 deletions

View File

@ -17,10 +17,11 @@ maintenance = { status = "actively-developed" }
[features] [features]
default = [] default = []
js-esbuild = ["esbuild-rs"] js-esbuild = ["crossbeam", "esbuild-rs"]
[dependencies] [dependencies]
esbuild-rs = { version = "0.0.5", optional = true } crossbeam = { version = "0.7.3", optional = true }
esbuild-rs = { version = "0.1.2", optional = true }
lazy_static = "1.4.0" lazy_static = "1.4.0"
memchr = "2.3.3" memchr = "2.3.3"

View File

@ -99,7 +99,7 @@ fn main() {
<details> <details>
<summary><strong>Node.js</strong></summary> <summary><strong>Node.js</strong></summary>
- Package: [@minify-html/js-esbuild](https://www.npmjs.com/package/@minify-html/js-esbuild) - Package: [@minify-html/js](https://www.npmjs.com/package/@minify-html/js)
- Binding: [Neon](https://neon-bindings.com/) - Binding: [Neon](https://neon-bindings.com/)
- Platforms: macOS, Linux; Node.js 8 and higher - Platforms: macOS, Linux; Node.js 8 and higher
@ -108,19 +108,19 @@ fn main() {
Using npm: Using npm:
```bash ```bash
npm i @minify-html/js-esbuild npm i @minify-html/js
``` ```
Using Yarn: Using Yarn:
```bash ```bash
yarn add @minify-html/js-esbuild yarn add @minify-html/js
``` ```
##### Use ##### Use
```js ```js
const minifyHtml = require("@minify-html/js-esbuild"); const minifyHtml = require("@minify-html/js");
const cfg = { minifyJs: false }; const cfg = { minifyJs: false };
const minified = minifyHtml.minify("<p> Hello, world! </p>", cfg); const minified = minifyHtml.minify("<p> Hello, world! </p>", cfg);
@ -134,7 +134,7 @@ const minified = minifyHtml.minifyInPlace(source, cfg);
minify-html is also available for TypeScript: minify-html is also available for TypeScript:
```ts ```ts
import * as minifyHtml from "@minify-html/js-esbuild"; import * as minifyHtml from "@minify-html/js";
import * as fs from "fs"; import * as fs from "fs";
const cfg = { minifyJs: false }; const cfg = { minifyJs: false };

View File

@ -1,5 +1,5 @@
const htmlMinifier = require("html-minifier"); const htmlMinifier = require("html-minifier");
const minifyHtml = require("@minify-html/js-esbuild"); const minifyHtml = require("@minify-html/js");
const minimize = require("minimize"); const minimize = require("minimize");
const terser = require('terser'); const terser = require('terser');

View File

@ -1,7 +1,7 @@
{ {
"private": true, "private": true,
"dependencies": { "dependencies": {
"@minify-html/js-esbuild": "file:../nodejs", "@minify-html/js": "file:../nodejs",
"benchmark": "2.1.4", "benchmark": "2.1.4",
"chart.js": "^2.9.3", "chart.js": "^2.9.3",
"chartjs-node": "^1.7.1", "chartjs-node": "^1.7.1",

View File

@ -22,4 +22,17 @@ impl ErrorType {
} }
} }
#[derive(Debug)]
pub struct Error {
pub error_type: ErrorType,
pub position: usize,
}
#[derive(Debug)]
pub struct FriendlyError {
pub position: usize,
pub message: String,
pub code_context: String,
}
pub type ProcessingResult<T> = Result<T, ErrorType>; pub type ProcessingResult<T> = Result<T, ErrorType>;

View File

@ -1,4 +1,4 @@
pub use crate::err::ErrorType as ErrorType; pub use crate::err::{Error, ErrorType, FriendlyError};
use crate::proc::Processor; use crate::proc::Processor;
use crate::unit::content::process_content; use crate::unit::content::process_content;
use crate::spec::tag::ns::Namespace; use crate::spec::tag::ns::Namespace;
@ -14,15 +14,18 @@ mod spec;
mod tests; mod tests;
mod unit; mod unit;
pub fn in_place(code: &mut [u8], cfg: &Cfg) -> Result<usize, (ErrorType, usize)> { pub fn in_place(code: &mut [u8], cfg: &Cfg) -> Result<usize, Error> {
let mut proc = Processor::new(code); let mut proc = Processor::new(code);
match process_content(&mut proc, cfg, Namespace::Html, None) { match process_content(&mut proc, cfg, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()), Ok(()) => Ok(proc.finish()),
Err(e) => Err((e, proc.read_len())), Err(e) => Err(Error {
error_type: e,
position: proc.read_len(),
}),
} }
} }
pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, (ErrorType, usize)> { pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, Error> {
let bytes = unsafe { code.as_bytes_mut() }; let bytes = unsafe { code.as_bytes_mut() };
match in_place(bytes, cfg) { match in_place(bytes, cfg) {
Ok(min_len) => Ok(unsafe { std::str::from_utf8_unchecked(&bytes[..min_len]) }), Ok(min_len) => Ok(unsafe { std::str::from_utf8_unchecked(&bytes[..min_len]) }),
@ -30,7 +33,7 @@ pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, (ErrorT
} }
} }
pub fn truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), (ErrorType, usize)> { pub fn truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), Error> {
match in_place(code, cfg) { match in_place(code, cfg) {
Ok(written_len) => { Ok(written_len) => {
code.truncate(written_len); code.truncate(written_len);
@ -40,7 +43,7 @@ pub fn truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), (ErrorType, usize)>
} }
} }
pub fn copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, (ErrorType, usize)> { pub fn copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, Error> {
let mut copy = code.to_vec(); let mut copy = code.to_vec();
match truncate(&mut copy, cfg) { match truncate(&mut copy, cfg) {
Ok(()) => Ok(copy), Ok(()) => Ok(copy),
@ -48,17 +51,10 @@ pub fn copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, (ErrorType, usize)> {
} }
} }
pub struct FriendlyError {
// Make public to allow destructuring.
pub position: usize,
pub message: String,
pub code_context: String,
}
pub fn with_friendly_error(code: &mut [u8], cfg: &Cfg) -> Result<usize, FriendlyError> { pub fn with_friendly_error(code: &mut [u8], cfg: &Cfg) -> Result<usize, FriendlyError> {
let mut proc = Processor::new(code); let mut proc = Processor::new(code);
match process_content(&mut proc, cfg, Namespace::Html, None) { match process_content(&mut proc, cfg, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()), Ok(()) => Ok(proc.finish()),
Err(e) => Err(FriendlyError { Err(e) => Err(FriendlyError {
position: proc.read_len(), position: proc.read_len(),
message: e.message(), message: e.message(),

View File

@ -8,6 +8,9 @@ use crate::proc::MatchMode::*;
use crate::proc::range::ProcessorRange; use crate::proc::range::ProcessorRange;
use memchr::memchr; use memchr::memchr;
use crate::gen::codepoints::{WHITESPACE, Lookup}; use crate::gen::codepoints::{WHITESPACE, Lookup};
use std::sync::{Arc, Mutex};
use esbuild_rs::TransformResult;
use crossbeam::sync::WaitGroup;
pub mod checkpoint; pub mod checkpoint;
pub mod entity; pub mod entity;
@ -39,6 +42,11 @@ pub enum MatchAction {
MatchOnly, MatchOnly,
} }
pub struct JsMinSection {
pub src_range: ProcessorRange,
pub result: TransformResult,
}
// Processing state of a file. Single use only; create one per processing. // Processing state of a file. Single use only; create one per processing.
pub struct Processor<'d> { pub struct Processor<'d> {
code: &'d mut [u8], code: &'d mut [u8],
@ -46,6 +54,10 @@ pub struct Processor<'d> {
read_next: usize, read_next: usize,
// Index of the next unwritten space. // Index of the next unwritten space.
write_next: usize, write_next: usize,
#[cfg(feature = "js-esbuild")]
script_wg: WaitGroup,
#[cfg(feature = "js-esbuild")]
script_results: Arc<Mutex<Vec<JsMinSection>>>,
} }
impl<'d> Index<ProcessorRange> for Processor<'d> { impl<'d> Index<ProcessorRange> for Processor<'d> {
@ -66,7 +78,15 @@ impl<'d> IndexMut<ProcessorRange> for Processor<'d> {
impl<'d> Processor<'d> { impl<'d> Processor<'d> {
// Constructor. // Constructor.
pub fn new(code: &mut [u8]) -> Processor { pub fn new(code: &mut [u8]) -> Processor {
Processor { write_next: 0, read_next: 0, code } Processor {
write_next: 0,
read_next: 0,
code,
#[cfg(feature = "js-esbuild")]
script_wg: WaitGroup::new(),
#[cfg(feature = "js-esbuild")]
script_results: Arc::new(Mutex::new(Vec::new())),
}
} }
// INTERNAL APIs. // INTERNAL APIs.
@ -194,11 +214,6 @@ impl<'d> Processor<'d> {
self.read_next self.read_next
} }
/// Get how many characters have been written to output.
pub fn written_len(&self) -> usize {
self.write_next
}
pub fn reserve_output(&mut self, amount: usize) -> () { pub fn reserve_output(&mut self, amount: usize) -> () {
self.write_next += amount; self.write_next += amount;
} }
@ -288,6 +303,34 @@ impl<'d> Processor<'d> {
debug_assert!(self._in_bounds(count - 1)); debug_assert!(self._in_bounds(count - 1));
self._shift(count); self._shift(count);
} }
pub fn new_script_section(&self) -> (WaitGroup, Arc<Mutex<Vec<JsMinSection>>>) {
(self.script_wg.clone(), self.script_results.clone())
}
pub fn finish(self) -> usize {
debug_assert!(self.at_end());
self.script_wg.wait();
let mut results = Arc::try_unwrap(self.script_results)
.unwrap_or_else(|_| panic!("failed to acquire script results"))
.into_inner()
.unwrap();
if !results.is_empty() {
results.sort_unstable_by_key(|r| r.src_range.start);
let mut write_start = results[0].src_range.start;
for (i, res) in results.iter().enumerate() {
let min_code = res.result.js.trim();
if min_code.len() < res.src_range.len() {
let write_end = write_start + min_code.len();
self.code[write_start..write_end].copy_from_slice(min_code.as_bytes());
let next_start = results.get(i + 1).map_or(self.write_next, |r| r.src_range.start);
self.code.copy_within(res.src_range.end..next_start, write_end);
write_start = write_end + (next_start - res.src_range.end);
};
};
};
self.write_next
}
} }
impl Debug for Processor<'_> { impl Debug for Processor<'_> {

View File

@ -1,10 +1,23 @@
use crate::err::ProcessingResult; use crate::err::ProcessingResult;
use crate::proc::MatchAction::*; use crate::proc::MatchAction::*;
use crate::proc::MatchMode::*; use crate::proc::MatchMode::*;
use crate::proc::Processor; use crate::proc::{Processor, JsMinSection};
use crate::cfg::Cfg; use crate::cfg::Cfg;
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
use crate::proc::checkpoint::Checkpoint; use crate::proc::checkpoint::Checkpoint;
use esbuild_rs::{TransformOptionsBuilder, TransformOptions};
use std::sync::Arc;
use lazy_static::lazy_static;
lazy_static! {
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new();
builder.minify_identifiers = true;
builder.minify_syntax = true;
builder.minify_whitespace = true;
builder.build()
};
}
pub fn process_script(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> { pub fn process_script(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
@ -17,17 +30,18 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
if proc.m(IsSeq(b"</script"), MatchOnly).nonempty() { if proc.m(IsSeq(b"</script"), MatchOnly).nonempty() {
#[cfg(feature = "js-esbuild")] #[cfg(feature = "js-esbuild")]
if cfg.minify_js { if cfg.minify_js {
let (wg, results) = proc.new_script_section();
let src_range = start.written_range(proc); let src_range = start.written_range(proc);
let src = unsafe { // TODO Optimise: Avoid copying to new Vec.
std::string::String::from_utf8_unchecked(proc[src_range].to_vec()) let src = Arc::new(proc[src_range].to_vec());
}; esbuild_rs::transform(src, TRANSFORM_OPTIONS.clone(), move |result| {
let min = esbuild_rs::esbuild(&src).trim().as_bytes(); results.lock().unwrap().push(JsMinSection {
// `src.len()` is amount of bytes, so this is guaranteed to not overwrite. src_range,
if min.len() < src.len() { result,
start.erase_written(proc); });
proc.write_slice(min); drop(wg);
return Ok(()); });
}; return Ok(());
}; };
break; break;
}; };