Add support for configuration and JS minification using esbuild

This commit is contained in:
Wilson Lin 2020-07-11 01:15:56 +10:00
commit ced0e82515
19 changed files with 174 additions and 62 deletions

View file

@ -16,9 +16,10 @@ include = ["/src/**/*", "/Cargo.toml", "/LICENSE", "/README.md"]
maintenance = { status = "actively-developed" }
[dependencies]
esbuild-rs = "0.0.4"
lazy_static = "1.4.0"
regex = "1.3.9"
memchr = "2.3.3"
regex = "1.3.9"
[profile.release]
panic = 'abort'

View file

@ -2,6 +2,8 @@
A fast one-pass in-place HTML minifier written in Rust with context-aware whitespace handling.
Also supports JS minification by plugging into [esbuild](https://github.com/evanw/esbuild).
Available as:
- CLI for Windows, macOS, and Linux.
- Rust library.
@ -26,6 +28,8 @@ Speed and effectiveness of Node.js version compared to [html-minfier](https://gi
Precompiled binaries are available for x86-64 Windows, macOS, and Linux.
Building from source currently requires the Go compiler to be installed as well, to build the [JS minifier](https://github.com/evanw/esbuild).
##### Get
[Windows](https://wilsonl.in/hyperbuild/bin/0.1.12-windows-x86_64.exe) |
@ -34,6 +38,8 @@ Precompiled binaries are available for x86-64 Windows, macOS, and Linux.
##### Use
Use the `--help` argument for more details.
```bash
hyperbuild --src /path/to/src.html --out /path/to/output.min.html
```
@ -53,28 +59,31 @@ hyperbuild = "0.1.12"
##### Use
```rust
use hyperbuild::{FriendlyError, hyperbuild};
use hyperbuild::{Cfg, FriendlyError, hyperbuild, hyperbuild_copy, hyperbuild_friendly_error, hyperbuild_truncate};
fn main() {
let mut code = b"<p> Hello, world! </p>".to_vec();
let cfg = &Cfg {
minify_js: false,
};
// Minifies a slice in-place and returns the new minified length,
// but leaves any original code after the minified code intact.
match hyperbuild(&mut code) {
match hyperbuild(&mut code, cfg) {
Ok(minified_len) => {}
Err((error_type, error_position)) => {}
};
// Creates a vector copy containing only minified code
// instead of minifying in-place.
match hyperbuild_copy(&code) {
match hyperbuild_copy(&code, cfg) {
Ok(minified) => {}
Err((error_type, error_position)) => {}
};
// Minifies a vector in-place, and then truncates the
// vector to the new minified length.
match hyperbuild_truncate(&mut code) {
match hyperbuild_truncate(&mut code, cfg) {
Ok(()) => {}
Err((error_type, error_position)) => {}
};
@ -82,7 +91,7 @@ fn main() {
// Identical to `hyperbuild` except with FriendlyError instead.
// `code_context` is a string of a visual representation of the source,
// with line numbers and position markers to aid in debugging syntax.
match hyperbuild_friendly_error(&mut code) {
match hyperbuild_friendly_error(&mut code, cfg) {
Ok(minified_len) => {}
Err(FriendlyError { position, message, code_context }) => {
eprintln!("Failed at character {}:", position);
@ -119,10 +128,11 @@ yarn add hyperbuild
```js
const hyperbuild = require("hyperbuild");
const minified = hyperbuild.minify("<p> Hello, world! </p>");
const cfg = { minifyJs: false };
const minified = hyperbuild.minify("<p> Hello, world! </p>", cfg);
// Alternatively, minify in place to avoid copying.
const source = Buffer.from("<p> Hello, world! </p>");
const source = Buffer.from("<p> Hello, world! </p>", cfg);
hyperbuild.minifyInPlace(source);
```
@ -132,8 +142,9 @@ hyperbuild is also available for TypeScript:
import * as hyperbuild from "hyperbuild";
import * as fs from "fs";
const minified = hyperbuild.minify("<p> Hello, world! </p>");
hyperbuild.minifyInPlace(fs.readFileSync("source.html"));
const cfg = { minifyJs: false };
const minified = hyperbuild.minify("<p> Hello, world! </p>", cfg);
hyperbuild.minifyInPlace(fs.readFileSync("source.html"), cfg);
```
</details>
@ -160,15 +171,18 @@ Add as a Maven dependency:
```java
import in.wilsonl.hyperbuild.Hyperbuild;
Hyperbuild.Configuration cfg = new Hyperbuild.Configuration.Builder()
.setMinifyJs(false)
.build();
try {
String minified = Hyperbuild.minify("<p> Hello, world! </p>");
String minified = Hyperbuild.minify("<p> Hello, world! </p>", cfg);
} catch (Hyperbuild.SyntaxException e) {
System.err.println(e.getMessage());
}
// Alternatively, minify in place:
assert source instanceof ByteBuffer && source.isDirect();
Hyperbuild.minifyInPlace(source);
Hyperbuild.minifyInPlace(source, cfg);
```
</details>
@ -188,7 +202,7 @@ Add the PyPI project as a dependency and install it using `pip` or `pipenv`.
import hyperbuild
try:
minified = hyperbuild.minify("<p> Hello, world! </p>")
minified = hyperbuild.minify("<p> Hello, world! </p>", minify_js=False)
except SyntaxError as e:
print(e)
```
@ -209,17 +223,13 @@ Add the library as a dependency to `Gemfile` or `*.gemspec`.
```ruby
require 'hyperbuild'
print Hyperbuild.minify "<p> Hello, world! </p>"
print Hyperbuild.minify("<p> Hello, world! </p>", { :minify_js => false })
```
</details>
## Minification
### Configurability
Configuration of minification is currently WIP across all languages. The behaviour mentioned below is the default.
### Whitespace
hyperbuild has advanced context-aware whitespace minification that does things such as:
@ -457,7 +467,7 @@ Bangs, [processing instructions](https://en.wikipedia.org/wiki/Processing_Instru
Only UTF-8/ASCII-encoded HTML code is supported.
hyperbuild simply does HTML minification, and almost does no syntax checking or standards enforcement for performance and code complexity reasons.
hyperbuild does no syntax checking or standards enforcement for performance and code complexity reasons.
For example, this means that it's not an error to have self-closing tags, declare multiple `<body>` elements, use incorrect attribute names and values, or write something like `<br>alert('');</br>`

View file

@ -6,5 +6,5 @@ authors = ["Wilson Lin <code@wilsonl.in>"]
edition = "2018"
[dependencies]
hyperbuild = "0.1.12"
hyperbuild = { path = ".." }
structopt = "0.3.5"

View file

@ -3,7 +3,7 @@ use std::io::{Read, stdin, stdout, Write};
use structopt::StructOpt;
use hyperbuild::{FriendlyError, hyperbuild_friendly_error};
use hyperbuild::{Cfg, FriendlyError, hyperbuild_friendly_error};
#[derive(StructOpt)]
struct Cli {
@ -11,6 +11,8 @@ struct Cli {
src: Option<std::path::PathBuf>,
#[structopt(short, long, parse(from_os_str))]
out: Option<std::path::PathBuf>,
#[structopt(long)]
js: bool,
}
macro_rules! io_expect {
@ -34,7 +36,9 @@ fn main() {
None => Box::new(stdin()),
};
io_expect!(src_file.read_to_end(&mut code), "could not load source code");
match hyperbuild_friendly_error(&mut code) {
match hyperbuild_friendly_error(&mut code, &Cfg {
minify_js: args.js,
}) {
Ok(out_len) => {
let mut out_file: Box<dyn Write> = match args.out {
Some(p) => Box::new(io_expect!(File::create(p), "could not open output file")),

View file

@ -44,6 +44,7 @@ else
fi
cargo build $rust_build_arg
mv Cargo.toml.orig Cargo.toml
mkdir -p src/main/resources/
cp target/rust/$rust_build_dir/libhyperbuild_java.$ext src/main/resources/$os_name-x86_64.nativelib
mvn clean package

View file

@ -53,9 +53,10 @@ public class Hyperbuild {
* If the code fails to be minified, a {@link SyntaxException} will be thrown with a descriptive English message and position in code where the error occurred.
*
* @param code {@link ByteBuffer} containing HTML code to minify
* @param cfg {@link Configuration} minification settings to use
* @return length of the written minified code in the {@link ByteBuffer}
*/
public static native int minifyInPlace(ByteBuffer code);
public static native int minifyInPlace(ByteBuffer code, Configuration cfg);
/**
* Minify HTML code represented as a {@link String}.
@ -63,9 +64,10 @@ public class Hyperbuild {
* If the code fails to be minified, a {@link SyntaxException} will be thrown with a descriptive English message and position in code where the error occurred.
*
* @param code HTML code to minify
* @param cfg {@link Configuration} minification settings to use
* @return minified HTML code
*/
public static native String minify(String code);
public static native String minify(String code, Configuration cfg);
/**
* Basic exception class representing minification errors.
@ -75,4 +77,31 @@ public class Hyperbuild {
super(message);
}
}
/**
* Class representing minification configuration.
*/
public static class Configuration {
private final boolean minifyJs;
public Configuration(boolean minifyJs) {
this.minifyJs = minifyJs;
}
/**
* Builder to help create configuration.
*/
public static class Builder {
private boolean minifyJs = false;
public Builder setMinifyJs(boolean minifyJs) {
this.minifyJs = minifyJs;
return this;
}
public Configuration build() {
return new Configuration(this.minifyJs);
}
}
}
}

View file

@ -1,14 +1,24 @@
use hyperbuild::hyperbuild;
use hyperbuild::{hyperbuild, Cfg};
use jni::JNIEnv;
use jni::objects::{JByteBuffer, JClass, JObject, JString};
use jni::sys::{jint, jstring};
use std::str::from_utf8_unchecked;
fn build_cfg(
env: &JNIEnv,
obj: &JObject,
) -> Cfg {
Cfg {
minify_js: env.get_field(*obj, "minifyJs", "Z").unwrap().z().unwrap(),
}
}
#[no_mangle]
pub extern "system" fn Java_in_wilsonl_hyperbuild_Hyperbuild_minifyInPlace(
env: JNIEnv,
_class: JClass,
input: JByteBuffer,
cfg: JObject,
)
-> jint {
let source = match env.get_direct_buffer_address(input) {
@ -19,7 +29,7 @@ pub extern "system" fn Java_in_wilsonl_hyperbuild_Hyperbuild_minifyInPlace(
}
};
(match hyperbuild(source) {
(match hyperbuild(source, &build_cfg(&env, &cfg)) {
Ok(out_len) => out_len,
Err((err, pos)) => {
env.throw_new(
@ -36,12 +46,13 @@ pub extern "system" fn Java_in_wilsonl_hyperbuild_Hyperbuild_minify(
env: JNIEnv,
_class: JClass,
input: JString,
cfg: JObject,
)
-> jstring {
let source: String = env.get_string(input).unwrap().into();
let mut code = source.into_bytes();
match hyperbuild(&mut code) {
match hyperbuild(&mut code, &build_cfg(&env, &cfg)) {
Ok(out_len) => env.new_string(unsafe { from_utf8_unchecked(&code[0..out_len]) }).unwrap().into_inner(),
Err((err, pos)) => {
env.throw_new(

View file

@ -13,8 +13,8 @@ name = "hyperbuild_nodejs_lib"
crate-type = ["cdylib"]
[build-dependencies]
neon-build = "0.3.3"
neon-build = "0.4.0"
[dependencies]
hyperbuild = "0.1.12"
neon = "0.3.3"
neon = "0.4.0"

View file

@ -1,9 +1,13 @@
use neon::prelude::*;
use hyperbuild::hyperbuild;
use hyperbuild::{Cfg, hyperbuild};
fn minify(mut cx: FunctionContext) -> JsResult<JsNumber> {
let mut buffer = cx.argument::<JsBuffer>(0)?;
match cx.borrow_mut(&mut buffer, |code| hyperbuild(code.as_mut_slice::<u8>())) {
let cfg_obj = cx.argument::<JsObject>(1)?;
let cfg = Cfg {
minify_js: cfg_obj.get(&mut cx, "minifyJs")?.downcast::<JsBoolean>().or_throw(&mut cx)?.value(),
};
match cx.borrow_mut(&mut buffer, |code| hyperbuild(code.as_mut_slice::<u8>(), &cfg)) {
Ok(out_len) => Ok(cx.number(out_len as f64)),
Err((err, pos)) => cx.throw_error(format!("{} [Character {}]", err.message(), pos)),
}

View file

@ -2,14 +2,18 @@ const binaryName = [process.platform, process.arch, process.versions.modules].jo
const hyperbuild = require(`./${binaryName}.node`);
export const minify = (code: string): string => {
export type Configuration = {
minifyJs: boolean;
};
export const minify = (code: string, cfg: Configuration): string => {
const buf = Buffer.from(code);
const len = hyperbuild.minify(buf);
const len = hyperbuild.minify(buf, cfg);
return buf.slice(0, len).toString();
};
export const minifyInPlace = (buf: Buffer): Buffer => {
const len = hyperbuild.minify(buf);
export const minifyInPlace = (buf: Buffer, cfg: Configuration): Buffer => {
const len = hyperbuild.minify(buf, cfg);
// This does not do a copy.
return buf.slice(0, len);
};

View file

@ -1,13 +1,16 @@
use hyperbuild::hyperbuild as hyperbuild_native;
use hyperbuild::{Cfg, hyperbuild as hyperbuild_native};
use pyo3::prelude::*;
use pyo3::exceptions::SyntaxError;
use pyo3::wrap_pyfunction;
use std::str::from_utf8_unchecked;
use pyo3::types::PyTuple;
#[pyfunction]
fn minify(code: String) -> PyResult<String> {
#[pyfunction(py_args="*", minify_js="false")]
fn minify(code: String, minify_js: bool) -> PyResult<String> {
let mut code = code.into_bytes();
match hyperbuild_native(&mut code) {
match hyperbuild_native(&mut code, &Cfg {
minify_js,
}) {
Ok(out_len) => Ok(unsafe { from_utf8_unchecked(&code[0..out_len]).to_string() }),
Err((err, pos)) => Err(SyntaxError::py_err(format!("{} [Character {}]", err.message(), pos))),
}

View file

@ -1,5 +1,5 @@
use hyperbuild::hyperbuild as hyperbuild_native;
use rutie::{Class, class, methods, Object, RString, VM};
use hyperbuild::{Cfg, hyperbuild as hyperbuild_native};
use rutie::{Boolean, Class, class, Hash, methods, Object, RString, Symbol, VM};
use std::str::from_utf8_unchecked;
class!(Hyperbuild);
@ -8,14 +8,21 @@ methods! {
Hyperbuild,
_itself,
fn minify(source: RString) -> RString {
fn minify(source: RString, cfg_hash: Hash) -> RString {
let mut code = source
.map_err(|e| VM::raise_ex(e) )
.unwrap()
.to_string()
.into_bytes();
hyperbuild_native(&mut code)
let cfg = &Cfg {
minify_js: cfg_hash
.map(|h| h.at(&Symbol::new("minify_js")))
.and_then(|e| e.try_convert_to::<Boolean>())
.map_or(false, |v| v.to_bool()),
};
hyperbuild_native(&mut code, cfg)
.map_err(|(err, pos)| VM::raise(Class::from_existing("SyntaxError"), format!("{} [Character {}]", err.message(), pos).as_str()))
.map(|out_len| RString::new_utf8(unsafe { from_utf8_unchecked(&code[0..out_len]) }))
.unwrap()

3
src/cfg/mod.rs Normal file
View file

@ -0,0 +1,3 @@
pub struct Cfg {
pub minify_js: bool,
}

View file

@ -2,7 +2,9 @@ pub use crate::err::ErrorType as ErrorType;
use crate::proc::Processor;
use crate::unit::content::process_content;
use crate::spec::tag::ns::Namespace;
pub use crate::cfg::Cfg;
mod cfg;
mod err;
mod gen;
mod pattern;
@ -12,27 +14,27 @@ mod spec;
mod tests;
mod unit;
pub fn hyperbuild(code: &mut [u8]) -> Result<usize, (ErrorType, usize)> {
pub fn hyperbuild(code: &mut [u8], cfg: &Cfg) -> Result<usize, (ErrorType, usize)> {
let mut proc = Processor::new(code);
match process_content(&mut proc, Namespace::Html, None) {
match process_content(&mut proc, cfg, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()),
Err(e) => Err((e, proc.read_len())),
}
}
pub fn hyperbuild_truncate(code: &mut Vec<u8>) -> Result<(), (ErrorType, usize)> {
match hyperbuild(code) {
pub fn hyperbuild_truncate(code: &mut Vec<u8>, cfg: &Cfg) -> Result<(), (ErrorType, usize)> {
match hyperbuild(code, cfg) {
Ok(written_len) => {
code.truncate(written_len);
Ok(())
},
}
Err(e) => Err(e),
}
}
pub fn hyperbuild_copy(code: &[u8]) -> Result<Vec<u8>, (ErrorType, usize)> {
pub fn hyperbuild_copy(code: &[u8], cfg: &Cfg) -> Result<Vec<u8>, (ErrorType, usize)> {
let mut copy = code.to_vec();
match hyperbuild_truncate(&mut copy) {
match hyperbuild_truncate(&mut copy, cfg) {
Ok(()) => Ok(copy),
Err(e) => Err(e),
}
@ -45,9 +47,9 @@ pub struct FriendlyError {
pub code_context: String,
}
pub fn hyperbuild_friendly_error(code: &mut [u8]) -> Result<usize, FriendlyError> {
pub fn hyperbuild_friendly_error(code: &mut [u8], cfg: &Cfg) -> Result<usize, FriendlyError> {
let mut proc = Processor::new(code);
match process_content(&mut proc, Namespace::Html, None) {
match process_content(&mut proc, cfg, Namespace::Html, None) {
Ok(()) => Ok(proc.written_len()),
Err(e) => Err(FriendlyError {
position: proc.read_len(),

View file

@ -1,7 +1,7 @@
#[cfg(test)]
fn eval(src: &'static [u8], expected: &'static [u8]) -> () {
fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) -> () {
let mut code = src.to_vec();
match super::hyperbuild_friendly_error(&mut code) {
match super::hyperbuild_friendly_error(&mut code, cfg) {
Ok(len) => {
assert_eq!(std::str::from_utf8(&code[..len]).unwrap(), std::str::from_utf8(expected).unwrap());
}
@ -13,6 +13,20 @@ fn eval(src: &'static [u8], expected: &'static [u8]) -> () {
};
}
#[cfg(test)]
fn eval(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(src, expected, &super::Cfg {
minify_js: false,
});
}
#[cfg(test)]
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(src, expected, &super::Cfg {
minify_js: true,
});
}
#[test]
fn test_collapse_whitespace() {
eval(b"<a> \n&#32; </a>", b"<a> </a>");
@ -258,3 +272,8 @@ fn test_left_chevron_entities_in_content() {
eval(b"&lt;&#59", b"&LT;;");
eval(b"&lt;&#59;", b"&LT;;");
}
#[test]
fn test_js_minification() {
eval_with_js_min(b"<script>let a = 1;</script>", b"<script>let a=1;</script>");
}

View file

@ -12,6 +12,7 @@ use crate::unit::tag::{MaybeClosingTag, process_tag};
use crate::spec::tag::ns::Namespace;
use crate::proc::entity::maybe_normalise_entity;
use crate::gen::codepoints::WHITESPACE;
use crate::cfg::Cfg;
#[derive(Copy, Clone, PartialEq, Eq)]
enum ContentType {
@ -44,7 +45,7 @@ impl ContentType {
}
}
pub fn process_content(proc: &mut Processor, ns: Namespace, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
pub fn process_content(proc: &mut Processor, cfg: &Cfg, ns: Namespace, parent: Option<ProcessorRange>) -> ProcessingResult<()> {
let &WhitespaceMinification { collapse, destroy_whole, trim } = get_whitespace_minification_for_tag(parent.map(|r| &proc[r]));
let handle_ws = collapse || destroy_whole || trim;
@ -113,7 +114,7 @@ pub fn process_content(proc: &mut Processor, ns: Namespace, parent: Option<Proce
// Process and consume next character(s).
match next_content_type {
ContentType::Tag => {
let new_closing_tag = process_tag(proc, ns, prev_sibling_closing_tag)?;
let new_closing_tag = process_tag(proc, cfg, ns, prev_sibling_closing_tag)?;
prev_sibling_closing_tag.replace(new_closing_tag);
}
ContentType::End => {

View file

@ -4,6 +4,9 @@ use crate::err::ProcessingResult;
use crate::proc::MatchAction::*;
use crate::proc::MatchMode::*;
use crate::proc::Processor;
use esbuild_rs::esbuild;
use crate::cfg::Cfg;
use std::string::String;
static SCRIPT_END_STR: &'static str = "</script";
@ -11,8 +14,17 @@ lazy_static! {
static ref SCRIPT_END: Regex = Regex::new(SCRIPT_END_STR).unwrap();
}
pub fn process_script(proc: &mut Processor) -> ProcessingResult<()> {
pub fn process_script(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> {
// `process_tag` will require closing tag.
proc.m(WhileNotPat(&SCRIPT_END, SCRIPT_END_STR.len()), Keep);
let code = proc.m(WhileNotPat(&SCRIPT_END, SCRIPT_END_STR.len()), Discard);
if cfg.minify_js {
let code_str = unsafe { String::from_utf8_unchecked(proc[code].to_vec()) };
let min = esbuild(&code_str).trim().as_bytes();
if min.len() < code.len() {
proc.write_slice(min);
return Ok(());
};
};
proc.write_range(code);
Ok(())
}

View file

@ -15,6 +15,7 @@ use crate::unit::style::process_style;
use crate::gen::attrs::{ATTRS, AttributeMinification};
use crate::spec::tag::ns::Namespace;
use crate::gen::codepoints::{TAG_NAME_CHAR, WHITESPACE};
use crate::cfg::Cfg;
lazy_static! {
pub static ref JAVASCRIPT_MIME_TYPES: HashSet<&'static [u8]> = {
@ -91,7 +92,7 @@ impl MaybeClosingTag {
}
// TODO Comment param `prev_sibling_closing_tag`.
pub fn process_tag(proc: &mut Processor, ns: Namespace, mut prev_sibling_closing_tag: MaybeClosingTag) -> ProcessingResult<MaybeClosingTag> {
pub fn process_tag(proc: &mut Processor, cfg: &Cfg, ns: Namespace, mut prev_sibling_closing_tag: MaybeClosingTag) -> ProcessingResult<MaybeClosingTag> {
// Expect to be currently at an opening tag.
proc.m(IsChar(b'<'), Discard).expect();
// May not be valid tag name at current position, so require instead of expect.
@ -202,9 +203,9 @@ pub fn process_tag(proc: &mut Processor, ns: Namespace, mut prev_sibling_closing
};
match tag_type {
TagType::Script => process_script(proc)?,
TagType::Script => process_script(proc, cfg)?,
TagType::Style => process_style(proc)?,
_ => process_content(proc, child_ns, Some(tag_name))?,
_ => process_content(proc, cfg, child_ns, Some(tag_name))?,
};
// Require closing tag for non-void.

View file

@ -78,7 +78,7 @@ for (const f of ['python/setup.py']) {
replaceInFile(f, /^(\s*version=)"\d+\.\d+\.\d+",\s*$/m, `$1"${NEW_VERSION}",`);
}
for (const f of ['README.md', 'cli/Cargo.toml', 'nodejs/native/Cargo.toml', 'java/Cargo.toml', 'python/Cargo.toml', 'ruby/Cargo.toml']) {
for (const f of ['README.md', 'nodejs/native/Cargo.toml', 'java/Cargo.toml', 'python/Cargo.toml', 'ruby/Cargo.toml']) {
replaceInFile(f, /^hyperbuild = "\d+\.\d+\.\d+"\s*$/m, `hyperbuild = "${NEW_VERSION}"`);
}