Use named error struct instead of tuple; use async JS minification; fix old JS package names

This commit is contained in:
Wilson Lin 2020-07-21 17:57:39 +10:00
parent f42b2648d2
commit b967e338ca
8 changed files with 108 additions and 41 deletions

View File

@ -17,10 +17,11 @@ maintenance = { status = "actively-developed" }
[features]
default = []
js-esbuild = ["esbuild-rs"]
js-esbuild = ["crossbeam", "esbuild-rs"]
[dependencies]
esbuild-rs = { version = "0.0.5", optional = true }
crossbeam = { version = "0.7.3", optional = true }
esbuild-rs = { version = "0.1.2", optional = true }
lazy_static = "1.4.0"
memchr = "2.3.3"

View File

@ -99,7 +99,7 @@ fn main() {
<details>
<summary><strong>Node.js</strong></summary>
- Package: [@minify-html/js-esbuild](https://www.npmjs.com/package/@minify-html/js-esbuild)
- Package: [@minify-html/js](https://www.npmjs.com/package/@minify-html/js)
- Binding: [Neon](https://neon-bindings.com/)
- Platforms: macOS, Linux; Node.js 8 and higher
@ -108,19 +108,19 @@ fn main() {
Using npm:
```bash
npm i @minify-html/js-esbuild
npm i @minify-html/js
```
Using Yarn:
```bash
yarn add @minify-html/js-esbuild
yarn add @minify-html/js
```
##### Use
```js
const minifyHtml = require("@minify-html/js-esbuild");
const minifyHtml = require("@minify-html/js");
const cfg = { minifyJs: false };
const minified = minifyHtml.minify("<p> Hello, world! </p>", cfg);
@ -134,7 +134,7 @@ const minified = minifyHtml.minifyInPlace(source, cfg);
minify-html is also available for TypeScript:
```ts
import * as minifyHtml from "@minify-html/js-esbuild";
import * as minifyHtml from "@minify-html/js";
import * as fs from "fs";
const cfg = { minifyJs: false };

View File

@ -1,5 +1,5 @@
const htmlMinifier = require("html-minifier");
const minifyHtml = require("@minify-html/js-esbuild");
const minifyHtml = require("@minify-html/js");
const minimize = require("minimize");
const terser = require('terser');

View File

@ -1,7 +1,7 @@
{
"private": true,
"dependencies": {
"@minify-html/js-esbuild": "file:../nodejs",
"@minify-html/js": "file:../nodejs",
"benchmark": "2.1.4",
"chart.js": "^2.9.3",
"chartjs-node": "^1.7.1",

View File

@ -22,4 +22,17 @@ impl ErrorType {
}
}
#[derive(Debug)]
pub struct Error {
pub error_type: ErrorType,
pub position: usize,
}
#[derive(Debug)]
pub struct FriendlyError {
pub position: usize,
pub message: String,
pub code_context: String,
}
pub type ProcessingResult<T> = Result<T, ErrorType>;

View File

@ -1,4 +1,4 @@
pub use crate::err::ErrorType as ErrorType;
pub use crate::err::{Error, ErrorType, FriendlyError};
use crate::proc::Processor;
use crate::unit::content::process_content;
use crate::spec::tag::ns::Namespace;
@ -14,15 +14,18 @@ mod spec;
mod tests;
mod unit;
pub fn in_place(code: &mut [u8], cfg: &Cfg) -> Result<usize, (ErrorType, usize)> {
pub fn in_place(code: &mut [u8], cfg: &Cfg) -> Result<usize, Error> {
let mut proc = Processor::new(code);
match process_content(&mut proc, cfg, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()),
Err(e) => Err((e, proc.read_len())),
Ok(()) => Ok(proc.finish()),
Err(e) => Err(Error {
error_type: e,
position: proc.read_len(),
}),
}
}
pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, (ErrorType, usize)> {
pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, Error> {
let bytes = unsafe { code.as_bytes_mut() };
match in_place(bytes, cfg) {
Ok(min_len) => Ok(unsafe { std::str::from_utf8_unchecked(&bytes[..min_len]) }),
@ -30,7 +33,7 @@ pub fn in_place_str<'s>(code: &'s mut str, cfg: &Cfg) -> Result<&'s str, (ErrorT
}
}
pub fn truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), (ErrorType, usize)> {
pub fn truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), Error> {
match in_place(code, cfg) {
Ok(written_len) => {
code.truncate(written_len);
@ -40,7 +43,7 @@ pub fn truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), (ErrorType, usize)>
}
}
pub fn copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, (ErrorType, usize)> {
pub fn copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, Error> {
let mut copy = code.to_vec();
match truncate(&mut copy, cfg) {
Ok(()) => Ok(copy),
@ -48,17 +51,10 @@ pub fn copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, (ErrorType, usize)> {
}
}
pub struct FriendlyError {
// Make public to allow destructuring.
pub position: usize,
pub message: String,
pub code_context: String,
}
pub fn with_friendly_error(code: &mut [u8], cfg: &Cfg) -> Result<usize, FriendlyError> {
let mut proc = Processor::new(code);
match process_content(&mut proc, cfg, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()),
Ok(()) => Ok(proc.finish()),
Err(e) => Err(FriendlyError {
position: proc.read_len(),
message: e.message(),

View File

@ -8,6 +8,9 @@ use crate::proc::MatchMode::*;
use crate::proc::range::ProcessorRange;
use memchr::memchr;
use crate::gen::codepoints::{WHITESPACE, Lookup};
use std::sync::{Arc, Mutex};
use esbuild_rs::TransformResult;
use crossbeam::sync::WaitGroup;
pub mod checkpoint;
pub mod entity;
@ -39,6 +42,11 @@ pub enum MatchAction {
MatchOnly,
}
pub struct JsMinSection {
pub src_range: ProcessorRange,
pub result: TransformResult,
}
// Processing state of a file. Single use only; create one per processing.
pub struct Processor<'d> {
code: &'d mut [u8],
@ -46,6 +54,10 @@ pub struct Processor<'d> {
read_next: usize,
// Index of the next unwritten space.
write_next: usize,
#[cfg(feature = "js-esbuild")]
script_wg: WaitGroup,
#[cfg(feature = "js-esbuild")]
script_results: Arc<Mutex<Vec<JsMinSection>>>,
}
impl<'d> Index<ProcessorRange> for Processor<'d> {
@ -66,7 +78,15 @@ impl<'d> IndexMut<ProcessorRange> for Processor<'d> {
impl<'d> Processor<'d> {
// Constructor.
pub fn new(code: &mut [u8]) -> Processor {
Processor { write_next: 0, read_next: 0, code }
Processor {
write_next: 0,
read_next: 0,
code,
#[cfg(feature = "js-esbuild")]
script_wg: WaitGroup::new(),
#[cfg(feature = "js-esbuild")]
script_results: Arc::new(Mutex::new(Vec::new())),
}
}
// INTERNAL APIs.
@ -194,11 +214,6 @@ impl<'d> Processor<'d> {
self.read_next
}
/// Get how many characters have been written to output.
pub fn written_len(&self) -> usize {
self.write_next
}
pub fn reserve_output(&mut self, amount: usize) -> () {
self.write_next += amount;
}
@ -288,6 +303,34 @@ impl<'d> Processor<'d> {
debug_assert!(self._in_bounds(count - 1));
self._shift(count);
}
pub fn new_script_section(&self) -> (WaitGroup, Arc<Mutex<Vec<JsMinSection>>>) {
(self.script_wg.clone(), self.script_results.clone())
}
pub fn finish(self) -> usize {
debug_assert!(self.at_end());
self.script_wg.wait();
let mut results = Arc::try_unwrap(self.script_results)
.unwrap_or_else(|_| panic!("failed to acquire script results"))
.into_inner()
.unwrap();
if !results.is_empty() {
results.sort_unstable_by_key(|r| r.src_range.start);
let mut write_start = results[0].src_range.start;
for (i, res) in results.iter().enumerate() {
let min_code = res.result.js.trim();
if min_code.len() < res.src_range.len() {
let write_end = write_start + min_code.len();
self.code[write_start..write_end].copy_from_slice(min_code.as_bytes());
let next_start = results.get(i + 1).map_or(self.write_next, |r| r.src_range.start);
self.code.copy_within(res.src_range.end..next_start, write_end);
write_start = write_end + (next_start - res.src_range.end);
};
};
};
self.write_next
}
}
impl Debug for Processor<'_> {

View File

@ -1,10 +1,23 @@
use crate::err::ProcessingResult;
use crate::proc::MatchAction::*;
use crate::proc::MatchMode::*;
use crate::proc::Processor;
use crate::proc::{Processor, JsMinSection};
use crate::cfg::Cfg;
#[cfg(feature = "js-esbuild")]
use crate::proc::checkpoint::Checkpoint;
use esbuild_rs::{TransformOptionsBuilder, TransformOptions};
use std::sync::Arc;
use lazy_static::lazy_static;
lazy_static! {
static ref TRANSFORM_OPTIONS: Arc<TransformOptions> = {
let mut builder = TransformOptionsBuilder::new();
builder.minify_identifiers = true;
builder.minify_syntax = true;
builder.minify_whitespace = true;
builder.build()
};
}
pub fn process_script(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
#[cfg(feature = "js-esbuild")]
@ -17,17 +30,18 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
if proc.m(IsSeq(b"</script"), MatchOnly).nonempty() {
#[cfg(feature = "js-esbuild")]
if cfg.minify_js {
let (wg, results) = proc.new_script_section();
let src_range = start.written_range(proc);
let src = unsafe {
std::string::String::from_utf8_unchecked(proc[src_range].to_vec())
};
let min = esbuild_rs::esbuild(&src).trim().as_bytes();
// `src.len()` is amount of bytes, so this is guaranteed to not overwrite.
if min.len() < src.len() {
start.erase_written(proc);
proc.write_slice(min);
return Ok(());
};
// TODO Optimise: Avoid copying to new Vec.
let src = Arc::new(proc[src_range].to_vec());
esbuild_rs::transform(src, TRANSFORM_OPTIONS.clone(), move |result| {
results.lock().unwrap().push(JsMinSection {
src_range,
result,
});
drop(wg);
});
return Ok(());
};
break;
};