Add spec-compliant unquoted attr values option
This commit is contained in:
parent
841c6de908
commit
0b58853999
12 changed files with 125 additions and 4 deletions
|
|
@ -24,6 +24,8 @@ struct Cli {
|
|||
/// Minify CSS in `<style>` tags and `style` attributes.
|
||||
#[structopt(long)]
|
||||
minify_css: bool,
|
||||
/// Ensure all unquoted attribute values in the output do not contain any characters prohibited by the WHATWG specification.
|
||||
pub ensure_spec_compliant_unquoted_attribute_values: bool,
|
||||
/// Do not omit closing tags when possible.
|
||||
#[structopt(long)]
|
||||
keep_closing_tags: bool,
|
||||
|
|
@ -71,6 +73,7 @@ fn main() {
|
|||
let out_code = minify(
|
||||
&src_code,
|
||||
&Cfg {
|
||||
ensure_spec_compliant_unquoted_attribute_values: args.ensure_spec_compliant_unquoted_attribute_values,
|
||||
keep_closing_tags: args.keep_closing_tags,
|
||||
keep_comments: args.keep_comments,
|
||||
keep_html_and_head_opening_tags: args.keep_html_and_head_opening_tags,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ package in.wilsonl.minifyhtml;
|
|||
* Class representing minification configuration.
|
||||
*/
|
||||
public class Configuration {
|
||||
public final boolean ensure_spec_compliant_unquoted_attribute_values;
|
||||
public final boolean keep_closing_tags;
|
||||
public final boolean keep_comments;
|
||||
public final boolean keep_html_and_head_opening_tags;
|
||||
|
|
@ -14,6 +15,7 @@ public class Configuration {
|
|||
public final boolean remove_processing_instructions;
|
||||
|
||||
public Configuration(
|
||||
boolean ensure_spec_compliant_unquoted_attribute_values,
|
||||
boolean keep_closing_tags,
|
||||
boolean keep_comments,
|
||||
boolean keep_html_and_head_opening_tags,
|
||||
|
|
@ -23,6 +25,7 @@ public class Configuration {
|
|||
boolean remove_bangs,
|
||||
boolean remove_processing_instructions
|
||||
) {
|
||||
this.ensure_spec_compliant_unquoted_attribute_values = ensure_spec_compliant_unquoted_attribute_values;
|
||||
this.keep_closing_tags = keep_closing_tags;
|
||||
this.keep_comments = keep_comments;
|
||||
this.keep_html_and_head_opening_tags = keep_html_and_head_opening_tags;
|
||||
|
|
@ -37,6 +40,7 @@ public class Configuration {
|
|||
* Builder to help create configuration.
|
||||
*/
|
||||
public static class Builder {
|
||||
private boolean ensure_spec_compliant_unquoted_attribute_values = false;
|
||||
private boolean keep_closing_tags = false;
|
||||
private boolean keep_comments = false;
|
||||
private boolean keep_html_and_head_opening_tags = false;
|
||||
|
|
@ -46,6 +50,11 @@ public class Configuration {
|
|||
private boolean remove_bangs = false;
|
||||
private boolean remove_processing_instructions = false;
|
||||
|
||||
public Builder setEnsureSpecCompliantUnquotedAttributeValues(boolean val) {
|
||||
this.ensure_spec_compliant_unquoted_attribute_values = val;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setKeepClosingTags(boolean val) {
|
||||
this.keep_closing_tags = val;
|
||||
return this;
|
||||
|
|
@ -89,6 +98,7 @@ public class Configuration {
|
|||
|
||||
public Configuration build() {
|
||||
return new Configuration(
|
||||
this.ensure_spec_compliant_unquoted_attribute_values,
|
||||
this.keep_closing_tags,
|
||||
this.keep_comments,
|
||||
this.keep_html_and_head_opening_tags,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ fn build_cfg(
|
|||
obj: &JObject,
|
||||
) -> Cfg {
|
||||
Cfg {
|
||||
ensure_spec_compliant_unquoted_attribute_values: env.get_field(*obj, "ensure_spec_compliant_unquoted_attribute_values", "Z").unwrap().z().unwrap(),
|
||||
keep_closing_tags: env.get_field(*obj, "keep_closing_tags", "Z").unwrap().z().unwrap(),
|
||||
keep_comments: env.get_field(*obj, "keep_comments", "Z").unwrap().z().unwrap(),
|
||||
keep_html_and_head_opening_tags: env.get_field(*obj, "keep_html_and_head_opening_tags", "Z").unwrap().z().unwrap(),
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ napi_value node_method_create_configuration(napi_env env, napi_callback_info inf
|
|||
/* It's OK if this fails. */ napi_get_value_bool(env, prop##_value, &prop); \
|
||||
}
|
||||
|
||||
GET_CFG_PROP(ensure_spec_compliant_unquoted_attribute_values);
|
||||
GET_CFG_PROP(keep_closing_tags);
|
||||
GET_CFG_PROP(keep_comments);
|
||||
GET_CFG_PROP(keep_html_and_head_opening_tags);
|
||||
|
|
@ -81,6 +82,7 @@ napi_value node_method_create_configuration(napi_env env, napi_callback_info inf
|
|||
GET_CFG_PROP(remove_processing_instructions);
|
||||
|
||||
Cfg const* cfg = ffi_create_cfg(
|
||||
ensure_spec_compliant_unquoted_attribute_values,
|
||||
keep_closing_tags,
|
||||
keep_comments,
|
||||
keep_html_and_head_opening_tags,
|
||||
|
|
|
|||
2
nodejs/index.d.ts
vendored
2
nodejs/index.d.ts
vendored
|
|
@ -8,6 +8,8 @@ export type Cfg = { __doNotUseCfgDirectly: string & { __itIsANapiExternalValue:
|
|||
* @returns An opaque value that can be passed to minify functions
|
||||
*/
|
||||
export function createConfiguration (options: {
|
||||
/** Ensure all unquoted attribute values in the output do not contain any characters prohibited by the WHATWG specification. */
|
||||
ensure_spec_compliant_unquoted_attribute_values?: boolean;
|
||||
/** Do not omit closing tags when possible. */
|
||||
keep_closing_tags?: boolean;
|
||||
/** Do not omit `<html>` and `<head>` opening tags when they don't have attributes. */
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use std::{mem, slice};
|
|||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn ffi_create_cfg(
|
||||
ensure_spec_compliant_unquoted_attribute_values: bool,
|
||||
keep_closing_tags: bool,
|
||||
keep_comments: bool,
|
||||
keep_html_and_head_opening_tags: bool,
|
||||
|
|
@ -13,6 +14,7 @@ pub extern "C" fn ffi_create_cfg(
|
|||
remove_processing_instructions: bool,
|
||||
) -> *const Cfg {
|
||||
Box::into_raw(Box::new(Cfg {
|
||||
ensure_spec_compliant_unquoted_attribute_values,
|
||||
keep_closing_tags,
|
||||
keep_comments,
|
||||
keep_html_and_head_opening_tags,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use std::string::String;
|
|||
|
||||
#[pyfunction(
|
||||
py_args="*",
|
||||
ensure_spec_compliant_unquoted_attribute_values="false",
|
||||
keep_closing_tags="false",
|
||||
keep_comments="false",
|
||||
keep_html_and_head_opening_tags="false",
|
||||
|
|
@ -16,6 +17,7 @@ use std::string::String;
|
|||
)]
|
||||
fn minify(
|
||||
code: String,
|
||||
ensure_spec_compliant_unquoted_attribute_values: bool,
|
||||
keep_closing_tags: bool,
|
||||
keep_comments: bool,
|
||||
keep_html_and_head_opening_tags: bool,
|
||||
|
|
@ -27,6 +29,7 @@ fn minify(
|
|||
) -> PyResult<String> {
|
||||
let code = code.into_bytes();
|
||||
let out_code = minify_html_native(&code, &Cfg {
|
||||
ensure_spec_compliant_unquoted_attribute_values,
|
||||
keep_closing_tags,
|
||||
keep_comments,
|
||||
keep_html_and_head_opening_tags,
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ methods! {
|
|||
.unwrap();
|
||||
|
||||
let cfg = &Cfg {
|
||||
ensure_spec_compliant_unquoted_attribute_values: get_cfg_hash_prop!(cfg_hash, "ensure_spec_compliant_unquoted_attribute_values"),
|
||||
keep_closing_tags: get_cfg_hash_prop!(cfg_hash, "keep_closing_tags"),
|
||||
keep_comments: get_cfg_hash_prop!(cfg_hash, "keep_comments"),
|
||||
keep_html_and_head_opening_tags: get_cfg_hash_prop!(cfg_hash, "keep_html_and_head_opening_tags"),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
/// Configuration settings that can be adjusted and passed to a minification function to change the
|
||||
/// minification approach.
|
||||
pub struct Cfg {
|
||||
/// Ensure all unquoted attribute values in the output do not contain any characters prohibited by the [WHATWG specification](https://html.spec.whatwg.org/multipage/syntax.html#attributes-2).
|
||||
pub ensure_spec_compliant_unquoted_attribute_values: bool,
|
||||
/// Do not omit closing tags when possible.
|
||||
pub keep_closing_tags: bool,
|
||||
/// Do not omit `<html>` and `<head>` opening tags when they don't have attributes.
|
||||
|
|
@ -30,6 +32,7 @@ pub struct Cfg {
|
|||
impl Cfg {
|
||||
pub fn new() -> Cfg {
|
||||
Cfg {
|
||||
ensure_spec_compliant_unquoted_attribute_values: false,
|
||||
keep_closing_tags: false,
|
||||
keep_comments: false,
|
||||
keep_html_and_head_opening_tags: false,
|
||||
|
|
|
|||
|
|
@ -107,10 +107,85 @@ fn build_unquoted_replacer() -> Replacer {
|
|||
)
|
||||
}
|
||||
|
||||
// If spec compliance is required, these characters must also be encoded in an unquoted attr value,
|
||||
// as well as `<` and `>`.
|
||||
static WHATWG_UNQUOTED: &[(u8, &[u8])] = &[
|
||||
(b'"', b"""),
|
||||
(b'\'', b"'"),
|
||||
(b'=', b"="),
|
||||
(b'`', b""),
|
||||
];
|
||||
|
||||
fn build_whatwg_unquoted_replacer() -> Replacer {
|
||||
let mut patterns = Vec::<Vec<u8>>::new();
|
||||
let mut replacements = Vec::<Vec<u8>>::new();
|
||||
|
||||
// Replace all whitespace with a numeric entity, unless the whitespace is followed by a digit or semicolon,
|
||||
// in which case add a semicolon to the encoded entity.
|
||||
for c in "0123456789;".bytes() {
|
||||
for &(ws, rep) in WS {
|
||||
patterns.push(vec![ws, c]);
|
||||
replacements.push({
|
||||
let mut ent = rep.to_vec();
|
||||
ent.push(b';');
|
||||
ent.push(c);
|
||||
ent
|
||||
});
|
||||
}
|
||||
}
|
||||
for &(ws, rep) in WS {
|
||||
patterns.push(vec![ws]);
|
||||
replacements.push(rep.to_vec());
|
||||
}
|
||||
|
||||
// Replace WHATWG-disallowed characters with a numeric entity, unless they're followed by a digit or semicolon,
|
||||
// in which case add a semicolon to the encoded entity.
|
||||
for c in "0123456789;".bytes() {
|
||||
for &(ws, rep) in WHATWG_UNQUOTED {
|
||||
patterns.push(vec![ws, c]);
|
||||
replacements.push({
|
||||
let mut ent = rep.to_vec();
|
||||
ent.push(b';');
|
||||
ent.push(c);
|
||||
ent
|
||||
});
|
||||
}
|
||||
}
|
||||
for &(ws, rep) in WHATWG_UNQUOTED {
|
||||
patterns.push(vec![ws]);
|
||||
replacements.push(rep.to_vec());
|
||||
}
|
||||
|
||||
// Replace all `<` with `<`, unless the chevron is followed by a semicolon,
|
||||
// in which case add a semicolon to the encoded entity.
|
||||
// Use `>` instead of `<` as `<` has more conflicting entities e.g. `⪦`, `⋖`.
|
||||
patterns.push(b"<;".to_vec());
|
||||
replacements.push(b"<;".to_vec());
|
||||
patterns.push(b"<".to_vec());
|
||||
replacements.push(b"<".to_vec());
|
||||
|
||||
// Replace all `>` with `>`, unless the chevron is followed by a semicolon,
|
||||
// in which case add a semicolon to the encoded entity.
|
||||
// Use `>` instead of `>` as `>` has more conflicting entities e.g. `⪧`, `⋗`.
|
||||
patterns.push(b">;".to_vec());
|
||||
replacements.push(b">;".to_vec());
|
||||
patterns.push(b">".to_vec());
|
||||
replacements.push(b">".to_vec());
|
||||
|
||||
Replacer::new(
|
||||
AhoCorasickBuilder::new()
|
||||
.dfa(true)
|
||||
.match_kind(MatchKind::LeftmostLongest)
|
||||
.build(patterns),
|
||||
replacements,
|
||||
)
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref DOUBLE_QUOTED_REPLACER: Replacer = build_double_quoted_replacer();
|
||||
static ref SINGLE_QUOTED_REPLACER: Replacer = build_single_quoted_replacer();
|
||||
static ref UNQUOTED_QUOTED_REPLACER: Replacer = build_unquoted_replacer();
|
||||
static ref WHATWG_UNQUOTED_QUOTED_REPLACER: Replacer = build_whatwg_unquoted_replacer();
|
||||
}
|
||||
|
||||
pub struct AttrMinifiedValue {
|
||||
|
|
@ -164,8 +239,12 @@ pub fn encode_using_single_quotes(val: &[u8]) -> AttrMinifiedValue {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn encode_unquoted(val: &[u8]) -> AttrMinifiedValue {
|
||||
let data = UNQUOTED_QUOTED_REPLACER.replace_all(val);
|
||||
pub fn encode_unquoted(val: &[u8], whatwg: bool) -> AttrMinifiedValue {
|
||||
let data = if whatwg {
|
||||
WHATWG_UNQUOTED_QUOTED_REPLACER.replace_all(val)
|
||||
} else {
|
||||
UNQUOTED_QUOTED_REPLACER.replace_all(val)
|
||||
};
|
||||
let prefix: &'static [u8] = match data.get(0) {
|
||||
Some(b'"') => match data.get(1) {
|
||||
Some(&c2) if DIGIT[c2] || c2 == b';' => b""",
|
||||
|
|
@ -259,7 +338,10 @@ pub fn minify_attr(
|
|||
if sq.len() < min.len() {
|
||||
min = sq;
|
||||
};
|
||||
let uq = encode_unquoted(&encoded);
|
||||
let uq = encode_unquoted(
|
||||
&encoded,
|
||||
cfg.ensure_spec_compliant_unquoted_attribute_values,
|
||||
);
|
||||
if uq.len() < min.len() {
|
||||
min = uq;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ fn test_encode_using_single_quotes() {
|
|||
|
||||
#[test]
|
||||
fn test_encode_unquoted() {
|
||||
let min = encode_unquoted(br#""123' 'h 0 && ;abbibi "' \ >& 3>;"#);
|
||||
let min = encode_unquoted(br#""123' 'h 0 && ;abbibi "' \ >& 3>;"#, false);
|
||||
assert_eq!(
|
||||
min.str(),
|
||||
r#""123' 'h   0 && ;abbibi "' \ >& 3>;"#,
|
||||
|
|
|
|||
|
|
@ -250,6 +250,18 @@ fn test_attr_unquoted_value_minification() {
|
|||
eval(b"<a b=hello></a>", b"<a b=hello></a>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_attr_whatwg_unquoted_value_minification() {
|
||||
let mut cfg = super::Cfg::new();
|
||||
cfg.ensure_spec_compliant_unquoted_attribute_values = true;
|
||||
eval_with_cfg(b"<a b==></a>", br#"<a b="="></a>"#, &cfg);
|
||||
eval_with_cfg(
|
||||
br#"<a b=`'"<<==/`/></a>"#,
|
||||
br#"<a b="`'"<<==/`/"></a>"#,
|
||||
&cfg,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_class_attr_value_minification() {
|
||||
eval(b"<a class= c></a>", b"<a class=c></a>");
|
||||
|
|
|
|||
Reference in a new issue