Make tag and attribute names case insensitive
This commit is contained in:
parent
373128e466
commit
abfc4bceaa
11
README.md
11
README.md
|
@ -479,8 +479,6 @@ However, there are some syntax requirements for speed and sanity.
|
|||
|
||||
### Tags
|
||||
|
||||
Tag names are case sensitive. For example, this means that `P` won't be recognised as a content element, `bR` won't be considered as a void tag, and the contents of `Script` won't be parsed as JavaScript.
|
||||
|
||||
Tags must not be [omitted](https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission). Void tags must not have a separate closing tag e.g. `</input>`.
|
||||
|
||||
### Entities
|
||||
|
@ -495,16 +493,9 @@ If a named entity is an invalid reference as per the [specification](https://htm
|
|||
|
||||
Numeric character references that do not reference a valid [Unicode Scalar Value](https://www.unicode.org/glossary/#unicode_scalar_value) are considered malformed.
|
||||
|
||||
### Attributes
|
||||
|
||||
Backticks (`` ` ``) are not valid quote marks and not interpreted as such.
|
||||
However, backticks are valid attribute value quotes in Internet Explorer.
|
||||
|
||||
Special handling of some attributes require case sensitive names and values. For example, `CLASS` won't be recognised as an attribute to minify, and `type="Text/JavaScript"` on a `<script>` will not be removed.
|
||||
|
||||
### Script and style
|
||||
|
||||
`script` and `style` tags must be closed with `</script>` and `</style>` respectively (case sensitive).
|
||||
`script` and `style` tags must be closed with `</script` and `</style` respectively (case sensitive).
|
||||
|
||||
minify-html does **not** handle [escaped and double-escaped](./notes/Script%20data.md) script content.
|
||||
|
||||
|
|
|
@ -296,6 +296,11 @@ impl<'d> Processor<'d> {
|
|||
self.write_next += 1;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn make_lowercase(&mut self, range: ProcessorRange) -> () {
|
||||
self.code[range.start..range.end].make_ascii_lowercase();
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn write_range(&mut self, s: ProcessorRange) -> ProcessorRange {
|
||||
let dest_start = self.write_next;
|
||||
|
|
|
@ -31,6 +31,9 @@ fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
|
|||
#[test]
|
||||
fn test_collapse_whitespace() {
|
||||
eval(b"<a> \n  </a>", b"<a> </a>");
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<A> \n  </a>", b"<a> </a>");
|
||||
eval(b"<a> \n  </A>", b"<a> </a>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -38,6 +41,8 @@ fn test_collapse_and_trim_whitespace() {
|
|||
eval(b"<label> \n  </label>", b"<label></label>");
|
||||
eval(b"<label> \n a </label>", b"<label>a</label>");
|
||||
eval(b"<label> \n a b </label>", b"<label>a b</label>");
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<lAbEL> \n a b </LABel>", b"<label>a b</label>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -46,11 +51,15 @@ fn test_collapse_destroy_whole_and_trim_whitespace() {
|
|||
eval(b"<ul> \n a </ul>", b"<ul>a</ul>");
|
||||
eval(b"<ul> \n a b </ul>", b"<ul>a b</ul>");
|
||||
eval(b"<ul> \n a<pre></pre> <pre></pre>b </ul>", b"<ul>a<pre></pre><pre></pre>b</ul>");
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<uL> \n a b </UL>", b"<ul>a b</ul>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_whitespace_minification() {
|
||||
eval(b"<pre> \n  \t </pre>", b"<pre> \n \t </pre>");
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<pRe> \n  \t </PRE>", b"<pre> \n \t </pre>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -78,6 +87,8 @@ fn test_removal_of_optional_tags() {
|
|||
</body>
|
||||
</html>
|
||||
"#, b"<html><head><body>");
|
||||
// Tag names should be case insensitive.
|
||||
eval(b"<RT></rt>", b"<rt>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -121,6 +132,8 @@ fn test_class_attr_value_minification() {
|
|||
eval(b"<a class=' c\n \n '></a>", b"<a class=c></a>");
|
||||
eval(b"<a class=' c\n \nd '></a>", b"<a class=\"c d\"></a>");
|
||||
eval(b"<a class=' \n \n '></a>", b"<a></a>");
|
||||
// Attribute names should be case insensitive.
|
||||
eval(b"<a CLasS=' \n \n '></a>", b"<a></a>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -134,6 +147,8 @@ fn test_d_attr_value_minification() {
|
|||
eval(b"<svg><path d=' c\n \n ' /></svg>", b"<svg><path d=c /></svg>");
|
||||
eval(b"<svg><path d=' c\n \nd ' /></svg>", b"<svg><path d=\"c d\"/></svg>");
|
||||
eval(b"<svg><path d=' \n \n ' /></svg>", b"<svg><path/></svg>");
|
||||
// Attribute names should be case insensitive.
|
||||
eval(b"<svg><path D=' \n \n ' /></svg>", b"<svg><path/></svg>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -145,6 +160,8 @@ fn test_boolean_attr_value_removal() {
|
|||
eval(b"<div hidden=\"abc\"></div>", b"<div hidden></div>");
|
||||
eval(b"<div hidden=\"\"></div>", b"<div hidden></div>");
|
||||
eval(b"<div hidden></div>", b"<div hidden></div>");
|
||||
// Attribute names should be case insensitive.
|
||||
eval(b"<div HIDden=\"true\"></div>", b"<div hidden></div>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -161,6 +178,8 @@ fn test_default_attr_value_removal() {
|
|||
eval(b"<a target=\"_self\"></a>", b"<a></a>");
|
||||
eval(b"<a target='_self'></a>", b"<a></a>");
|
||||
eval(b"<a target=_self></a>", b"<a></a>");
|
||||
// Attribute names should be case insensitive.
|
||||
eval(b"<a taRGET='_self'></a>", b"<a></a>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -169,6 +188,8 @@ fn test_script_type_attr_value_removal() {
|
|||
eval(b"<script type=\"application/javascript\"></script>", b"<script></script>");
|
||||
eval(b"<script type=\"text/jscript\"></script>", b"<script></script>");
|
||||
eval(b"<script type=\"text/plain\"></script>", b"<script type=text/plain></script>");
|
||||
// Tag and attribute names should be case insensitive.
|
||||
eval(b"<SCRipt TYPE=\"application/ecmascript\"></script>", b"<script></script>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -28,6 +28,7 @@ pub fn process_attr(proc: &mut Processor, ns: Namespace, element: ProcessorRange
|
|||
// It's possible to expect attribute name but not be called at an attribute, e.g. due to whitespace between name and
|
||||
// value, which causes name to be considered boolean attribute and `=` to be start of new (invalid) attribute name.
|
||||
let name = proc.m(WhileInLookup(ATTR_NAME_CHAR), Keep).require("attribute name")?;
|
||||
proc.make_lowercase(name);
|
||||
let attr_cfg = ATTRS.get(ns, &proc[element], &proc[name]);
|
||||
let is_boolean = attr_cfg.filter(|attr| attr.boolean).is_some();
|
||||
let after_name = Checkpoint::new(proc);
|
||||
|
|
|
@ -99,6 +99,7 @@ pub fn process_tag(proc: &mut Processor, cfg: &Cfg, ns: Namespace, mut prev_sibl
|
|||
proc.m(IsChar(b'<'), Discard).expect();
|
||||
// May not be valid tag name at current position, so require instead of expect.
|
||||
let source_tag_name = proc.m(WhileInLookup(TAG_NAME_CHAR), Discard).require("tag name")?;
|
||||
proc.make_lowercase(source_tag_name);
|
||||
if prev_sibling_closing_tag.exists_and(|prev_tag|
|
||||
CLOSING_TAG_OMISSION_RULES
|
||||
.get(&proc[prev_tag])
|
||||
|
@ -219,6 +220,7 @@ pub fn process_tag(proc: &mut Processor, cfg: &Cfg, ns: Namespace, mut prev_sibl
|
|||
// Require closing tag for non-void.
|
||||
proc.m(IsSeq(b"</"), Discard).require("closing tag")?;
|
||||
let closing_tag = proc.m(WhileInLookup(TAG_NAME_CHAR), Discard).require("closing tag name")?;
|
||||
proc.make_lowercase(closing_tag);
|
||||
// We need to check closing tag matches as otherwise when we later write closing tag, it might be longer than source closing tag and cause source to be overwritten.
|
||||
if !proc[closing_tag].eq(&proc[tag_name]) {
|
||||
return Err(ErrorType::ClosingTagMismatch {
|
||||
|
|
Loading…
Reference in New Issue