Add spec-compliant unquoted attr values option

This commit is contained in:
Wilson Lin 2021-08-08 01:45:25 +10:00
commit 0b58853999
12 changed files with 125 additions and 4 deletions

View file

@ -24,6 +24,8 @@ struct Cli {
/// Minify CSS in `<style>` tags and `style` attributes.
#[structopt(long)]
minify_css: bool,
/// Ensure all unquoted attribute values in the output do not contain any characters prohibited by the WHATWG specification.
pub ensure_spec_compliant_unquoted_attribute_values: bool,
/// Do not omit closing tags when possible.
#[structopt(long)]
keep_closing_tags: bool,
@ -71,6 +73,7 @@ fn main() {
let out_code = minify(
&src_code,
&Cfg {
ensure_spec_compliant_unquoted_attribute_values: args.ensure_spec_compliant_unquoted_attribute_values,
keep_closing_tags: args.keep_closing_tags,
keep_comments: args.keep_comments,
keep_html_and_head_opening_tags: args.keep_html_and_head_opening_tags,

View file

@ -4,6 +4,7 @@ package in.wilsonl.minifyhtml;
* Class representing minification configuration.
*/
public class Configuration {
public final boolean ensure_spec_compliant_unquoted_attribute_values;
public final boolean keep_closing_tags;
public final boolean keep_comments;
public final boolean keep_html_and_head_opening_tags;
@ -14,6 +15,7 @@ public class Configuration {
public final boolean remove_processing_instructions;
public Configuration(
boolean ensure_spec_compliant_unquoted_attribute_values,
boolean keep_closing_tags,
boolean keep_comments,
boolean keep_html_and_head_opening_tags,
@ -23,6 +25,7 @@ public class Configuration {
boolean remove_bangs,
boolean remove_processing_instructions
) {
this.ensure_spec_compliant_unquoted_attribute_values = ensure_spec_compliant_unquoted_attribute_values;
this.keep_closing_tags = keep_closing_tags;
this.keep_comments = keep_comments;
this.keep_html_and_head_opening_tags = keep_html_and_head_opening_tags;
@ -37,6 +40,7 @@ public class Configuration {
* Builder to help create configuration.
*/
public static class Builder {
private boolean ensure_spec_compliant_unquoted_attribute_values = false;
private boolean keep_closing_tags = false;
private boolean keep_comments = false;
private boolean keep_html_and_head_opening_tags = false;
@ -46,6 +50,11 @@ public class Configuration {
private boolean remove_bangs = false;
private boolean remove_processing_instructions = false;
public Builder setEnsureSpecCompliantUnquotedAttributeValues(boolean val) {
this.ensure_spec_compliant_unquoted_attribute_values = val;
return this;
}
public Builder setKeepClosingTags(boolean val) {
this.keep_closing_tags = val;
return this;
@ -89,6 +98,7 @@ public class Configuration {
public Configuration build() {
return new Configuration(
this.ensure_spec_compliant_unquoted_attribute_values,
this.keep_closing_tags,
this.keep_comments,
this.keep_html_and_head_opening_tags,

View file

@ -9,6 +9,7 @@ fn build_cfg(
obj: &JObject,
) -> Cfg {
Cfg {
ensure_spec_compliant_unquoted_attribute_values: env.get_field(*obj, "ensure_spec_compliant_unquoted_attribute_values", "Z").unwrap().z().unwrap(),
keep_closing_tags: env.get_field(*obj, "keep_closing_tags", "Z").unwrap().z().unwrap(),
keep_comments: env.get_field(*obj, "keep_comments", "Z").unwrap().z().unwrap(),
keep_html_and_head_opening_tags: env.get_field(*obj, "keep_html_and_head_opening_tags", "Z").unwrap().z().unwrap(),

View file

@ -71,6 +71,7 @@ napi_value node_method_create_configuration(napi_env env, napi_callback_info inf
/* It's OK if this fails. */ napi_get_value_bool(env, prop##_value, &prop); \
}
GET_CFG_PROP(ensure_spec_compliant_unquoted_attribute_values);
GET_CFG_PROP(keep_closing_tags);
GET_CFG_PROP(keep_comments);
GET_CFG_PROP(keep_html_and_head_opening_tags);
@ -81,6 +82,7 @@ napi_value node_method_create_configuration(napi_env env, napi_callback_info inf
GET_CFG_PROP(remove_processing_instructions);
Cfg const* cfg = ffi_create_cfg(
ensure_spec_compliant_unquoted_attribute_values,
keep_closing_tags,
keep_comments,
keep_html_and_head_opening_tags,

2
nodejs/index.d.ts vendored
View file

@ -8,6 +8,8 @@ export type Cfg = { __doNotUseCfgDirectly: string & { __itIsANapiExternalValue:
* @returns An opaque value that can be passed to minify functions
*/
export function createConfiguration (options: {
/** Ensure all unquoted attribute values in the output do not contain any characters prohibited by the WHATWG specification. */
ensure_spec_compliant_unquoted_attribute_values?: boolean;
/** Do not omit closing tags when possible. */
keep_closing_tags?: boolean;
/** Do not omit `<html>` and `<head>` opening tags when they don't have attributes. */

View file

@ -3,6 +3,7 @@ use std::{mem, slice};
#[no_mangle]
pub extern "C" fn ffi_create_cfg(
ensure_spec_compliant_unquoted_attribute_values: bool,
keep_closing_tags: bool,
keep_comments: bool,
keep_html_and_head_opening_tags: bool,
@ -13,6 +14,7 @@ pub extern "C" fn ffi_create_cfg(
remove_processing_instructions: bool,
) -> *const Cfg {
Box::into_raw(Box::new(Cfg {
ensure_spec_compliant_unquoted_attribute_values,
keep_closing_tags,
keep_comments,
keep_html_and_head_opening_tags,

View file

@ -5,6 +5,7 @@ use std::string::String;
#[pyfunction(
py_args="*",
ensure_spec_compliant_unquoted_attribute_values="false",
keep_closing_tags="false",
keep_comments="false",
keep_html_and_head_opening_tags="false",
@ -16,6 +17,7 @@ use std::string::String;
)]
fn minify(
code: String,
ensure_spec_compliant_unquoted_attribute_values: bool,
keep_closing_tags: bool,
keep_comments: bool,
keep_html_and_head_opening_tags: bool,
@ -27,6 +29,7 @@ fn minify(
) -> PyResult<String> {
let code = code.into_bytes();
let out_code = minify_html_native(&code, &Cfg {
ensure_spec_compliant_unquoted_attribute_values,
keep_closing_tags,
keep_comments,
keep_html_and_head_opening_tags,

View file

@ -29,6 +29,7 @@ methods! {
.unwrap();
let cfg = &Cfg {
ensure_spec_compliant_unquoted_attribute_values: get_cfg_hash_prop!(cfg_hash, "ensure_spec_compliant_unquoted_attribute_values"),
keep_closing_tags: get_cfg_hash_prop!(cfg_hash, "keep_closing_tags"),
keep_comments: get_cfg_hash_prop!(cfg_hash, "keep_comments"),
keep_html_and_head_opening_tags: get_cfg_hash_prop!(cfg_hash, "keep_html_and_head_opening_tags"),

View file

@ -1,6 +1,8 @@
/// Configuration settings that can be adjusted and passed to a minification function to change the
/// minification approach.
pub struct Cfg {
/// Ensure all unquoted attribute values in the output do not contain any characters prohibited by the [WHATWG specification](https://html.spec.whatwg.org/multipage/syntax.html#attributes-2).
pub ensure_spec_compliant_unquoted_attribute_values: bool,
/// Do not omit closing tags when possible.
pub keep_closing_tags: bool,
/// Do not omit `<html>` and `<head>` opening tags when they don't have attributes.
@ -30,6 +32,7 @@ pub struct Cfg {
impl Cfg {
pub fn new() -> Cfg {
Cfg {
ensure_spec_compliant_unquoted_attribute_values: false,
keep_closing_tags: false,
keep_comments: false,
keep_html_and_head_opening_tags: false,

View file

@ -107,10 +107,85 @@ fn build_unquoted_replacer() -> Replacer {
)
}
// If spec compliance is required, these characters must also be encoded in an unquoted attr value,
// as well as `<` and `>`.
static WHATWG_UNQUOTED: &[(u8, &[u8])] = &[
(b'"', b"&#34"),
(b'\'', b"&#39"),
(b'=', b"&#61"),
(b'`', b"&#6"),
];
fn build_whatwg_unquoted_replacer() -> Replacer {
let mut patterns = Vec::<Vec<u8>>::new();
let mut replacements = Vec::<Vec<u8>>::new();
// Replace all whitespace with a numeric entity, unless the whitespace is followed by a digit or semicolon,
// in which case add a semicolon to the encoded entity.
for c in "0123456789;".bytes() {
for &(ws, rep) in WS {
patterns.push(vec![ws, c]);
replacements.push({
let mut ent = rep.to_vec();
ent.push(b';');
ent.push(c);
ent
});
}
}
for &(ws, rep) in WS {
patterns.push(vec![ws]);
replacements.push(rep.to_vec());
}
// Replace WHATWG-disallowed characters with a numeric entity, unless they're followed by a digit or semicolon,
// in which case add a semicolon to the encoded entity.
for c in "0123456789;".bytes() {
for &(ws, rep) in WHATWG_UNQUOTED {
patterns.push(vec![ws, c]);
replacements.push({
let mut ent = rep.to_vec();
ent.push(b';');
ent.push(c);
ent
});
}
}
for &(ws, rep) in WHATWG_UNQUOTED {
patterns.push(vec![ws]);
replacements.push(rep.to_vec());
}
// Replace all `<` with `&LT`, unless the chevron is followed by a semicolon,
// in which case add a semicolon to the encoded entity.
// Use `&GT` instead of `&lt` as `&lt` has more conflicting entities e.g. `&ltcc;`, `&ltdot;`.
patterns.push(b"<;".to_vec());
replacements.push(b"&LT;;".to_vec());
patterns.push(b"<".to_vec());
replacements.push(b"&LT".to_vec());
// Replace all `>` with `&GT`, unless the chevron is followed by a semicolon,
// in which case add a semicolon to the encoded entity.
// Use `&GT` instead of `&gt` as `&gt` has more conflicting entities e.g. `&gtcc;`, `&gtdot;`.
patterns.push(b">;".to_vec());
replacements.push(b"&GT;;".to_vec());
patterns.push(b">".to_vec());
replacements.push(b"&GT".to_vec());
Replacer::new(
AhoCorasickBuilder::new()
.dfa(true)
.match_kind(MatchKind::LeftmostLongest)
.build(patterns),
replacements,
)
}
lazy_static! {
static ref DOUBLE_QUOTED_REPLACER: Replacer = build_double_quoted_replacer();
static ref SINGLE_QUOTED_REPLACER: Replacer = build_single_quoted_replacer();
static ref UNQUOTED_QUOTED_REPLACER: Replacer = build_unquoted_replacer();
static ref WHATWG_UNQUOTED_QUOTED_REPLACER: Replacer = build_whatwg_unquoted_replacer();
}
pub struct AttrMinifiedValue {
@ -164,8 +239,12 @@ pub fn encode_using_single_quotes(val: &[u8]) -> AttrMinifiedValue {
}
}
pub fn encode_unquoted(val: &[u8]) -> AttrMinifiedValue {
let data = UNQUOTED_QUOTED_REPLACER.replace_all(val);
pub fn encode_unquoted(val: &[u8], whatwg: bool) -> AttrMinifiedValue {
let data = if whatwg {
WHATWG_UNQUOTED_QUOTED_REPLACER.replace_all(val)
} else {
UNQUOTED_QUOTED_REPLACER.replace_all(val)
};
let prefix: &'static [u8] = match data.get(0) {
Some(b'"') => match data.get(1) {
Some(&c2) if DIGIT[c2] || c2 == b';' => b"&#34;",
@ -259,7 +338,10 @@ pub fn minify_attr(
if sq.len() < min.len() {
min = sq;
};
let uq = encode_unquoted(&encoded);
let uq = encode_unquoted(
&encoded,
cfg.ensure_spec_compliant_unquoted_attribute_values,
);
if uq.len() < min.len() {
min = uq;
};

View file

@ -22,7 +22,7 @@ fn test_encode_using_single_quotes() {
#[test]
fn test_encode_unquoted() {
let min = encode_unquoted(br#""123' 'h 0 &amp&amp; ;abbibi "' \ >& 3>;"#);
let min = encode_unquoted(br#""123' 'h 0 &amp&amp; ;abbibi "' \ >& 3>;"#, false);
assert_eq!(
min.str(),
r#"&#34;123'&#32'h&#32&#32&#32;0&#32&amp&amp;&#32;;abbibi&#32"'&#32\&#32&GT&&#32;3&GT;;"#,

View file

@ -250,6 +250,18 @@ fn test_attr_unquoted_value_minification() {
eval(b"<a b=hello></a>", b"<a b=hello></a>");
}
#[test]
fn test_attr_whatwg_unquoted_value_minification() {
let mut cfg = super::Cfg::new();
cfg.ensure_spec_compliant_unquoted_attribute_values = true;
eval_with_cfg(b"<a b==></a>", br#"<a b="="></a>"#, &cfg);
eval_with_cfg(
br#"<a b=`'"<<==/`/></a>"#,
br#"<a b="`'&#34<<==/`/"></a>"#,
&cfg,
);
}
#[test]
fn test_class_attr_value_minification() {
eval(b"<a class=&#x20;c></a>", b"<a class=c></a>");