Refactor out Emscripten version and restructure

This commit is contained in:
Wilson Lin 2018-10-26 00:54:54 +13:00
parent 7bdefd916a
commit 277ea3303c
66 changed files with 11 additions and 607 deletions

1
.gitignore vendored
View File

@ -1,3 +1,2 @@
/out/
/docs/
node_modules/

View File

@ -1,8 +1,15 @@
# hyperbuild
A fast HTML minifier written in C.
Designed to be used in C projects, as an executable, and in Node.js thanks to Emscripten.
Minifier heavily influenced by [kangax's html-minifier](https://github.com/kangax/html-minifier).
A fast HTML minifier written in C, heavily influenced by [kangax's html-minifier](https://github.com/kangax/html-minifier).
Available in different flavours:
- Standalone 64-bit Linux executable (this)
- [Node.js](https://github.com/wilsonzlin/hyperbuild-nodejs)
- [Express](https://github.com/wilsonzlin/hyperbuild-express)
- [Webpack](https://github.com/wilsonzlin/hyperbuild-webpack)
- [Apache](https://github.com/wilsonzlin/hyperbuild-apache)
- [Nginx](https://github.com/wilsonzlin/hyperbuild-nginx)
## Features
@ -12,7 +19,7 @@ hyperbuild minifies as it parses, directly streaming processed HTML to the outpu
### Super fast
hyperbuild is written in C, and can be run on Node.js using Emscripten, which generates fast Wasm code.
hyperbuild is written in C, and uses technologies like Emscripten and Cython to preserve performance in higher-level languages.
### Smart whitespace handling

View File

@ -1,60 +0,0 @@
{
"name": "hyperbuild",
"version": "0.0.3",
"description": "Streaming HTML minifier written in C and Emscripten for Node.js",
"types": "out/main.d.js",
"main": "out/hyperbuild.em.js",
"files": [
"out/hyperbuild.em.js",
"out/hyperbuild.em.wasm",
"out/main.d.ts"
],
"scripts": {
"clean": "rm -rf tmp out",
"preprocess": "./preprocess.sh",
"build": "npm run clean && ./compile.sh",
"buildDebug": "npm run clean && ./compile.sh --debug",
"buildInternalDocs": "npm run preprocess && doxygen Doxyfile",
"test": "mocha out/test/**/*.spec.js",
"prepublishOnly": "npm run test && npm run build"
},
"repository": {
"type": "git",
"url": "git+https://github.com/wilsonzlin/hyperbuild.git"
},
"keywords": [
"html",
"c",
"fast",
"build",
"builder",
"preprocess",
"preprocessor",
"minify",
"minifier"
],
"author": {
"email": "contact@wilsonl.in",
"name": "Wilson Lin",
"url": "https://wilsonl.in/"
},
"license": "MIT",
"bugs": {
"url": "https://github.com/wilsonzlin/hyperbuild/issues"
},
"homepage": "https://github.com/wilsonzlin/hyperbuild#readme",
"devDependencies": {
"@types/chai": "^4.1.3",
"@types/emscripten": "0.0.31",
"@types/fs-extra": "^5.0.4",
"@types/mocha": "^5.2.1",
"@types/node": "^10.5.6",
"@types/strip-ansi": "^3.0.0",
"chai": "^4.1.2",
"fs-extra": "7.0.0",
"mocha": "^5.2.0",
"strip-ansi": "^4.0.0",
"ts-node": "^6.1.0",
"typescript": "3.0.1"
}
}

View File

@ -1,191 +0,0 @@
#include "./stream/content/html.c"
#include "./__main__.c"
void em_init(void) {
hb_init();
}
hbe_err_t em_entry(
// Can be NULL
char *input_code,
size_t input_code_length,
// Can be NULL
char *input_path,
// Can be NULL
char **output_code,
// Can be NULL
size_t *output_code_length,
// Can be NULL
char *output_path,
int config_keep,
int config_buffer,
// Can be NULL
char *suppress,
// Need nondefault_ex_* because NULL is a valid value for ex_*
int nondefault_ex_collapse_whitespace,
char *ex_collapse_whitespace,
int nondefault_ex_destroy_whole_whitespace,
char *ex_destroy_whole_whitespace,
int nondefault_ex_trim_whitespace,
char *ex_trim_whitespace,
int trim_class_attr,
int decode_entities,
int min_conditional_comments,
int remove_attr_quotes,
int remove_comments,
int remove_optional_tags,
int remove_tag_whitespace
) {
em_init();
hbe_err_t err = NULL;
hbe_err_t *hbe_err = &err;
hbu_fstreamin_t input_file = NULL;
hbu_list_char_t input_buffer = NULL;
hbu_fstreamout_t output_file = NULL;
hbu_list_char_t output_buffer = NULL;
hbu_streamoptions_t config_stream = hbu_streamoptions_create();
hbu_pipe_t pipe = NULL;
nh_set_str_t ex_collapse_whitespace_set = NULL;
nh_set_str_t ex_destroy_whole_whitespace_set = NULL;
nh_set_str_t ex_trim_whitespace_set = NULL;
if (suppress != NULL) {
HBE_CATCH_F(hbu_streamoptions_parse_and_add_errors_to_suppress, config_stream->suppressed_errors, suppress);
}
if (nondefault_ex_collapse_whitespace) {
ex_collapse_whitespace_set = HBE_CATCH_F(hbu_streamoptions_parse_list_of_tags, ex_collapse_whitespace);
config_stream->ex_collapse_whitespace = ex_collapse_whitespace_set;
} else {
config_stream->ex_collapse_whitespace = hbu_streamoptions_default_ex_collapse_whitespace();
}
if (nondefault_ex_destroy_whole_whitespace) {
ex_destroy_whole_whitespace_set = HBE_CATCH_F(hbu_streamoptions_parse_list_of_tags, ex_destroy_whole_whitespace);
config_stream->ex_destroy_whole_whitespace = ex_destroy_whole_whitespace_set;
} else {
config_stream->ex_destroy_whole_whitespace = hbu_streamoptions_default_ex_destroy_whole_whitespace();
}
if (nondefault_ex_trim_whitespace) {
ex_trim_whitespace_set = HBE_CATCH_F(hbu_streamoptions_parse_list_of_tags, ex_trim_whitespace);
config_stream->ex_trim_whitespace = ex_trim_whitespace_set;
} else {
config_stream->ex_trim_whitespace = hbu_streamoptions_default_ex_trim_whitespace();
}
config_stream->trim_class_attr = trim_class_attr;
config_stream->decode_entities = decode_entities;
config_stream->min_conditional_comments = min_conditional_comments;
config_stream->remove_attr_quotes = remove_attr_quotes;
config_stream->remove_comments = remove_comments;
config_stream->remove_optional_tags = remove_optional_tags;
config_stream->remove_tag_whitespace = remove_tag_whitespace;
pipe = hbu_pipe_create_blank(input_path);
if (input_code != NULL) {
// Use provided code as input
input_buffer = hbu_list_char_create();
hbu_list_char_extend_arr(input_buffer, (hb_char_t *) input_code, input_code_length);
hbu_pipe_blank_set_input_buffer(pipe, input_buffer);
} else {
// Read from a file as input
input_file = HBE_CATCH_F(hbu_fstreamin_create, input_path);
hbu_pipe_blank_set_input_fstreamin(pipe, input_file);
}
if (config_buffer || input_code != NULL) {
// Direct output to a buffer if --buffer or input is code (not file)
output_buffer = hbu_list_char_create();
hbu_pipe_blank_set_output_buffer(pipe, output_buffer);
} else {
// Direct output to a file
output_file = HBE_CATCH_F(hbu_fstreamout_create, output_path);
hbu_pipe_blank_set_output_fstreamout(pipe, output_file);
}
// Magic
HBE_CATCH_F(hbs_content, config_stream, pipe, NULL);
if (output_code != NULL) {
// Send back pointer to underlying data in output buffer
*output_code = (char *) hbu_list_char_underlying(output_buffer);
*output_code_length = output_buffer->length;
hbu_list_char_destroy_shallow(output_buffer);
output_buffer = NULL;
} else if (config_buffer) {
// Write buffered output data to file
output_file = HBE_CATCH_F(hbu_fstreamout_create, output_path);
HBE_CATCH_F(hbu_fstreamout_write_buffer, output_file, output_buffer);
}
finally:
if (err != NULL) {
if (output_file != NULL && input_buffer == NULL && !config_keep) {
// Delete only after opening output stream (don't delete before existing file has not been touched)
// Don't need to set if $config_buffer, as it won't write anything anyway
if (unlink(output_path)) {
hbl_log(HBL_LOG_WARN, "Failed to delete file %s with error %d", output_path, errno);
}
}
}
// Don't overwrite any existing err
if (input_file != NULL) {
hbe_err_t fstreamin_destroy_error = NULL;
hbu_fstreamin_destroy(&fstreamin_destroy_error, input_file);
if (fstreamin_destroy_error != NULL) {
hbl_error(fstreamin_destroy_error);
}
}
if (input_buffer != NULL) {
hbu_list_char_destroy(input_buffer);
}
// Don't overwrite any existing err
if (output_file != NULL) {
hbe_err_t fstreamout_destroy_error = NULL;
hbu_fstreamout_destroy(&fstreamout_destroy_error, output_file);
if (fstreamout_destroy_error != NULL) {
hbl_error(fstreamout_destroy_error);
}
}
if (output_buffer != NULL) {
hbu_list_char_destroy(output_buffer);
}
hbu_streamoptions_destroy(config_stream);
if (pipe != NULL) {
hbu_pipe_destroy(pipe);
}
if (ex_collapse_whitespace_set != NULL) {
nh_set_str_destroy(ex_collapse_whitespace_set);
}
if (ex_destroy_whole_whitespace_set != NULL) {
nh_set_str_destroy(ex_destroy_whole_whitespace_set);
}
if (ex_trim_whitespace_set != NULL) {
nh_set_str_destroy(ex_trim_whitespace_set);
}
return err;
}

View File

@ -1,16 +0,0 @@
(function() {
"use strict";
let lines = [];
document.querySelectorAll(".named").forEach(t => {
let hex = t.nextElementSibling.textContent.slice(3, -1);
t.textContent.trim().split(/\s+/).map(raw => {
let r = raw.slice(1, -1);
lines.push([r, Number.parseInt(hex, 16)]);
});
});
console.log(lines.map(l => {
return `nh_map_str_int32_set(hbr_entityrefs_map, "${l[0]}", 0x${l[1].toString(16)});`
}).sort().join("\n"));
})();

View File

@ -1,233 +0,0 @@
declare var _em_init: any;
declare var _free: any;
declare var _hbe_err_code: any;
declare var _hbe_err_destroy: any;
declare var _hbe_err_message: any;
declare var _malloc: any;
declare var ALLOC_NORMAL: any;
declare var allocate: any;
declare var cwrap: any;
declare var getValue: any;
declare var lengthBytesUTF8: any;
declare var Pointer_stringify: any;
declare var stringToUTF8: any;
declare var UTF8ToString: any;
export interface IHyperbuildSettings<O extends string | undefined> {
keep?: boolean;
buffer?: boolean;
inputCode?: string;
inputFile?: string;
outputFile?: O;
suppress?: string[];
MXcollapseWhitespace?: string[];
MXdestroyWholeWhitespace?: string[];
MXtrimWhitespace?: string[];
trim_class_attr?: boolean;
decode_entities?: boolean;
min_conditional_comments?: boolean;
remove_attr_quotes?: boolean;
remove_comments?: boolean;
remove_optional_tags?: boolean;
remove_tag_whitespace?: boolean;
}
export class HyperbuildError extends Error {
code: number;
public constructor(code: number, message: string) {
super();
this.code = code;
this.message = message;
}
}
const hyperbuild_c_arg_types = [
'pointer', // input_code
'number', // input_code_length
'string', // input_path
'number', // output_code (pointer)
'number', // output_code_length (pointer)
'string', // output_path
'boolean', // config_keep
'boolean', // config_buffer
'string', // suppress
'boolean', // nondefault_ex_collapse_whitespace
'string', // ex_collapse_whitespace
'boolean', // nondefault_ex_destroy_whole_whitespace
'string', // ex_destroy_whole_whitespace
'boolean', // nondefault_ex_trim_whitespace
'string', // ex_trim_whitespace
'boolean', // trim_class_attr
'boolean', // decode_entities
'boolean', // min_conditional_comments
'boolean', // remove_attr_quotes
'boolean', // remove_comments
'boolean', // remove_optional_tags
'boolean', // remove_tag_whitespace
];
const hyperbuild_c = cwrap('em_entry', 'number', hyperbuild_c_arg_types);
export interface IHyperbuild {
<O extends string | undefined>(settings: IHyperbuildSettings<O>): O
}
export function hyperbuild<O extends string | undefined>({
keep = false,
buffer = false,
inputCode,
inputFile,
outputFile,
suppress,
MXcollapseWhitespace,
MXdestroyWholeWhitespace,
MXtrimWhitespace,
trim_class_attr = true,
decode_entities = true,
min_conditional_comments = true,
remove_attr_quotes = true,
remove_comments = true,
remove_optional_tags = true,
remove_tag_whitespace = true,
}: IHyperbuildSettings<O>): O {
let args: any[] = hyperbuild_c_arg_types.map(() => null);
let input_code_ptr = 0;
let output_ptr = 0;
let output_size_ptr = 0;
if (inputCode != undefined) {
let bytes = lengthBytesUTF8(inputCode);
args[0] = input_code_ptr = _malloc(bytes + 1);
stringToUTF8(inputCode, input_code_ptr, bytes + 1);
args[1] = bytes;
} else {
args[2] = inputFile;
}
if (outputFile == undefined) {
// Pointers are 32-bit integers in Emscripten
// https://kripken.github.io/emscripten-site/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.htm
args[3] = output_ptr = allocate([0], "*", ALLOC_NORMAL);
args[4] = output_size_ptr = allocate([0], "*", ALLOC_NORMAL);
} else {
args[5] = outputFile;
}
args[6] = keep;
args[7] = buffer;
if (suppress != undefined) {
args[8] = suppress.join(",");
}
if (MXcollapseWhitespace != undefined) {
args[9] = true;
args[10] = MXcollapseWhitespace.join(",");
} else {
args[9] = false;
}
if (MXdestroyWholeWhitespace != undefined) {
args[11] = true;
args[12] = MXdestroyWholeWhitespace.join(",");
} else {
args[11] = false;
}
if (MXtrimWhitespace != undefined) {
args[13] = true;
args[14] = MXtrimWhitespace.join(",");
} else {
args[13] = false;
}
args[15] = trim_class_attr;
args[16] = decode_entities;
args[17] = min_conditional_comments;
args[18] = remove_attr_quotes;
args[19] = remove_comments;
args[20] = remove_optional_tags;
args[21] = remove_tag_whitespace;
let err = null;
let err_ptr = hyperbuild_c.apply(undefined, args);
if (err_ptr) {
let err_code = _hbe_err_code(err_ptr);
// TODO Does this work with non-ASCII?
// TODO Needs length as error message might contain NUL
let err_message = Pointer_stringify(_hbe_err_message(err_ptr));
_hbe_err_destroy(err_ptr);
err = new HyperbuildError(err_code, err_message);
}
let rv: O = undefined as O;
if (output_ptr) {
// TODO Figure out size_t
let length = getValue(output_size_ptr, "i64");
// TODO Does this work with non-ASCII?
rv = Pointer_stringify(getValue(output_ptr, "*"), length) as O;
}
[input_code_ptr, output_ptr, output_size_ptr].forEach(ptr => {
if (ptr) {
_free(ptr);
}
});
if (err) {
throw err;
}
return rv;
}
let loaded = false;
const onload_functions: ((hyperbuild: IHyperbuild) => any)[] = [];
const onload_promises: Function[] = [];
export function onload(callback: (hyperbuild: IHyperbuild) => any) {
if (loaded) {
callback(hyperbuild);
} else {
onload_functions.push(callback);
}
}
export function load(): Promise<IHyperbuild> {
if (loaded) {
return Promise.resolve(hyperbuild);
} else {
return new Promise(resolve => {
onload_promises.push(resolve);
});
}
}
Module['onRuntimeInitialized'] = () => {
_em_init();
loaded = true;
onload_functions.forEach(fn => {
fn(hyperbuild);
});
onload_promises.forEach(res => {
res(hyperbuild);
});
};

View File

@ -1,70 +0,0 @@
import {expect} from "chai";
import "mocha";
const {load: loadHyperbuild, hyperbuild} = require("../../hyperbuild.em.js");
before(() => {
return loadHyperbuild();
});
describe("hyperbuild", () => {
it("should trim whitespace", () => {
expect(hyperbuild({
inputCode: `<h1> a </h1>`,
})).to.equal("<h1>a</h1>");
});
it("should destroy whole whitespace", () => {
expect(hyperbuild({
inputCode: `<div>
<section></section>
<section>
<h1> Helloo </h1>
</section>
</div>`,
})).to.equal("<div><section></section><section><h1>Helloo</h1></section></div>");
});
it("should destroy whole whitespace at root", () => {
expect(hyperbuild({
inputCode: `
<div>
<section></section>
</div>
<div>
</div>
`,
})).to.equal("<div><section></section></div><div></div>");
});
it("should throw an error on malformed entities", () => {
[
`<div>&x10FFF;</div>`,
`<div>Johnson & Johnson</div>`,
`<div>&10FFFF;</div>`,
`<div>&mdash</div>`,
].map(p => {
let err = null;
try {
hyperbuild({inputCode: p});
} catch (e) {
err = e;
}
expect(err)
.and.have.property("code", 65);
});
});
});
it("should decode valid entities", () => {
expect(hyperbuild({
inputCode: `<div>&#x10FFFF;</div>`,
})).to.equal(`<div>\u{10FFFF}</div>`);
expect(hyperbuild({
inputCode: `<div>&apos;</div>`,
})).to.equal(`<div>'</div>`);
});

View File

@ -1,32 +0,0 @@
{
"include": [
"src/**/*.ts"
],
"compilerOptions": {
"target": "es5",
"lib": [
"es5",
"es6",
"es7",
"es2017"
],
"module": "commonjs",
"esModuleInterop": true,
"skipLibCheck": true,
"allowJs": false,
"declaration": true,
"outDir": "out",
"strict": true,
"suppressImplicitAnyIndexErrors": true,
"noImplicitAny": true,
"strictNullChecks": true,
"strictFunctionTypes": true,
"strictPropertyInitialization": true,
"noImplicitThis": true,
"alwaysStrict": true,
"noUnusedParameters": true,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true
}
}