Clean up dead code and tests

This commit is contained in:
Wilson Lin 2021-08-06 17:54:23 +10:00
parent 383b2b3423
commit bf37e37e71
18 changed files with 70 additions and 138 deletions

View File

@ -49,12 +49,3 @@ pub enum NodeData {
value: Vec<u8>,
},
}
impl NodeData {
pub fn is_element(&self) -> bool {
match self {
NodeData::Element { .. } => true,
_ => false,
}
}
}

View File

@ -12,6 +12,7 @@ mod minify;
mod parse;
mod pattern;
mod spec;
#[cfg(test)]
mod tests;
/// Copies a slice into a new Vec and minifies it, returning the Vec.
@ -37,13 +38,7 @@ mod tests;
/// ```
pub fn minify(src: &[u8], cfg: &Cfg) -> Vec<u8> {
let mut code = Code::new(src);
let parsed = parse_content(
cfg,
&mut code,
Namespace::Html,
EMPTY_TAG_NAME,
EMPTY_TAG_NAME,
);
let parsed = parse_content(&mut code, Namespace::Html, EMPTY_TAG_NAME, EMPTY_TAG_NAME);
let mut out = Vec::with_capacity(src.len());
minify_content(cfg, &mut out, false, EMPTY_TAG_NAME, parsed.children);
out

View File

@ -104,7 +104,7 @@ lazy_static! {
#[derive(Copy, Clone, Eq, PartialEq)]
pub enum AttrType {
None,
NoValue,
Quoted,
Unquoted,
}
@ -169,13 +169,13 @@ pub fn minify_attr_val(val: &[u8]) -> AttrValMinified {
suffix: b"'",
};
let unquoted = {
let mut res = UNQUOTED_QUOTED_REPLACER.replace_all(val);
let first_char_encoded: &'static [u8] = match res.get(0) {
Some(b'"') => match res.get(1) {
let data = UNQUOTED_QUOTED_REPLACER.replace_all(val);
let first_char_encoded: &'static [u8] = match data.get(0) {
Some(b'"') => match data.get(1) {
Some(&s) if DIGIT[s] || s == b';' => b"&#34;",
_ => b"&#34",
},
Some(b'\'') => match res.get(1) {
Some(b'\'') => match data.get(1) {
Some(&s) if DIGIT[s] || s == b';' => b"&#39;",
_ => b"&#39",
},
@ -185,7 +185,7 @@ pub fn minify_attr_val(val: &[u8]) -> AttrValMinified {
AttrValMinified {
typ: AttrType::Unquoted,
prefix: b"",
data: res,
data,
start,
suffix: b"",
}

View File

@ -12,9 +12,7 @@ use crate::minify::instruction::minify_instruction;
use crate::minify::js::minify_js;
use crate::pattern::Replacer;
use crate::spec::entity::encode::encode_ampersands;
use crate::spec::tag::ns::Namespace;
use crate::spec::tag::whitespace::{get_whitespace_minification_for_tag, WhitespaceMinification};
use crate::spec::tag::EMPTY_TAG_NAME;
fn build_chevron_replacer() -> Replacer {
let mut patterns = Vec::<Vec<u8>>::new();

View File

@ -1,20 +1,12 @@
use std::collections::HashMap;
use crate::ast::{ElementClosingTag, NodeData, ScriptOrStyleLang};
use crate::ast::{ElementClosingTag, NodeData};
use crate::cfg::Cfg;
use crate::gen::codepoints::TAG_NAME_CHAR;
use crate::minify::attr::{minify_attr_val, AttrType, AttrValMinified};
use crate::minify::bang::minify_bang;
use crate::minify::comment::minify_comment;
use crate::minify::attr::{minify_attr_val, AttrType};
use crate::minify::content::minify_content;
use crate::minify::css::minify_css;
use crate::minify::instruction::minify_instruction;
use crate::minify::js::minify_js;
use crate::pattern::Replacer;
use crate::spec::entity::encode::encode_ampersands;
use crate::spec::tag::ns::Namespace;
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
use crate::spec::tag::EMPTY_TAG_NAME;
pub fn minify_element(
cfg: &Cfg,
@ -38,13 +30,15 @@ pub fn minify_element(
out.push(b'<');
out.extend_from_slice(tag_name);
let mut last_attr = AttrType::None;
let mut last_attr = AttrType::NoValue;
for (name, value) in attributes {
if !cfg.remove_spaces_between_attributes || last_attr == AttrType::Unquoted {
if !cfg.remove_spaces_between_attributes || last_attr != AttrType::Quoted {
out.push(b' ');
};
out.extend_from_slice(&name);
if !value.is_empty() {
if value.is_empty() {
last_attr = AttrType::NoValue;
} else {
let min = minify_attr_val(&encode_ampersands(&value, true));
out.push(b'=');
min.out(out);

View File

@ -1,9 +1,8 @@
use crate::ast::NodeData;
use crate::parse::Code;
use crate::Cfg;
use memchr::memchr;
pub fn parse_bang(cfg: &Cfg, code: &mut Code) -> NodeData {
pub fn parse_bang(code: &mut Code) -> NodeData {
debug_assert!(code.str().starts_with(b"<!"));
code.shift(2);
let (len, matched) = match memchr(b'>', code.str()) {

View File

@ -3,13 +3,12 @@ use lazy_static::lazy_static;
use crate::ast::NodeData;
use crate::parse::Code;
use crate::Cfg;
lazy_static! {
static ref COMMENT_END: AhoCorasick = AhoCorasick::new(&["-->"]);
}
pub fn parse_comment(cfg: &Cfg, code: &mut Code) -> NodeData {
pub fn parse_comment(code: &mut Code) -> NodeData {
debug_assert!(code.str().starts_with(b"<!--"));
code.shift(4);
let (len, matched) = match COMMENT_END.find(code.str()) {

View File

@ -13,7 +13,6 @@ use crate::spec::entity::decode::decode_entities;
use crate::spec::tag::ns::Namespace;
use crate::spec::tag::omission::{can_omit_as_before, can_omit_as_last_node};
use crate::spec::tag::void::VOID_TAGS;
use crate::Cfg;
#[derive(Copy, Clone, Eq, PartialEq)]
enum ContentType {
@ -53,7 +52,6 @@ pub struct ParsedContent {
// Use empty slice for `grandparent` or `parent` if none.
pub fn parse_content(
cfg: &Cfg,
code: &mut Code,
ns: Namespace,
grandparent: &[u8],
@ -110,14 +108,14 @@ pub fn parse_content(
};
match typ {
Text => break,
OpeningTag => nodes.push(parse_element(cfg, code, ns, parent)),
OpeningTag => nodes.push(parse_element(code, ns, parent)),
ClosingTag => {
closing_tag_omitted = false;
break;
}
Instruction => nodes.push(parse_instruction(cfg, code)),
Bang => nodes.push(parse_bang(cfg, code)),
Comment => nodes.push(parse_comment(cfg, code)),
Instruction => nodes.push(parse_instruction(code)),
Bang => nodes.push(parse_bang(code)),
Comment => nodes.push(parse_comment(code)),
MalformedLeftChevronSlash => code.shift(match memrchr(b'>', code.str()) {
Some(m) => m + 1,
None => code.rem(),

View File

@ -14,7 +14,6 @@ use crate::spec::entity::decode::decode_entities;
use crate::spec::script::JAVASCRIPT_MIME_TYPES;
use crate::spec::tag::ns::Namespace;
use crate::spec::tag::void::VOID_TAGS;
use crate::Cfg;
fn parse_tag_name(code: &mut Code) -> Vec<u8> {
debug_assert!(code.str().starts_with(b"<"));
@ -41,9 +40,9 @@ pub struct ParsedTag {
// While not valid, attributes in closing tags still need to be parsed (and then discarded) as attributes e.g. `</div x=">">`, which is why this function is used for both opening and closing tags.
// TODO Use generics to create version that doesn't create a HashMap.
pub fn parse_tag(code: &mut Code) -> ParsedTag {
let mut elem_name = parse_tag_name(code);
let elem_name = parse_tag_name(code);
let mut attributes = HashMap::<Vec<u8>, Vec<u8>>::new();
let mut self_closing = false;
let self_closing;
loop {
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).
let last = code.shift_while_in_lookup(WHITESPACE_OR_SLASH);
@ -90,7 +89,7 @@ pub fn parse_tag(code: &mut Code) -> ParsedTag {
// `<` or `</` must be next. If `</` is next, tag is reinterpreted as opening tag (i.e. `/` is ignored).
// `parent` should be an empty slice if it doesn't exist.
pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -> NodeData {
pub fn parse_element(code: &mut Code, ns: Namespace, parent: &[u8]) -> NodeData {
let ParsedTag {
name: elem_name,
attributes,
@ -126,19 +125,19 @@ pub fn parse_element(cfg: &Cfg, code: &mut Code, ns: Namespace, parent: &[u8]) -
};
let ParsedContent {
mut closing_tag_omitted,
closing_tag_omitted,
children,
} = match elem_name.as_slice() {
// TODO to_vec call allocates every time?
b"script" => match attributes.get(&b"type".to_vec()) {
Some(mime) if !JAVASCRIPT_MIME_TYPES.contains(mime.as_slice()) => {
parse_script_content(cfg, code, ScriptOrStyleLang::Data)
parse_script_content(code, ScriptOrStyleLang::Data)
}
_ => parse_script_content(cfg, code, ScriptOrStyleLang::JS),
_ => parse_script_content(code, ScriptOrStyleLang::JS),
},
b"style" => parse_style_content(cfg, code),
b"textarea" => parse_textarea_content(cfg, code),
_ => parse_content(cfg, code, child_ns, parent, &elem_name),
b"style" => parse_style_content(code),
b"textarea" => parse_textarea_content(code),
_ => parse_content(code, child_ns, parent, &elem_name),
};
if !closing_tag_omitted {

View File

@ -3,13 +3,12 @@ use lazy_static::lazy_static;
use crate::ast::NodeData;
use crate::parse::Code;
use crate::Cfg;
lazy_static! {
static ref INSTRUCTION_END: AhoCorasick = AhoCorasick::new(&["?>"]);
}
pub fn parse_instruction(cfg: &Cfg, code: &mut Code) -> NodeData {
pub fn parse_instruction(code: &mut Code) -> NodeData {
debug_assert!(code.str().starts_with(b"<?"));
code.shift(2);
let (len, matched) = match INSTRUCTION_END.find(code.str()) {

View File

@ -124,10 +124,6 @@ impl<'c> Code<'c> {
last
}
pub fn get(&self, i: usize) -> Option<u8> {
self.code.get(self.next + i).map(|&c| c)
}
pub fn rem(&self) -> usize {
self.code.len() - self.next
}

View File

@ -5,7 +5,6 @@ use lazy_static::lazy_static;
use crate::ast::{NodeData, ScriptOrStyleLang};
use crate::parse::content::ParsedContent;
use crate::parse::Code;
use crate::Cfg;
lazy_static! {
static ref END: AhoCorasick = AhoCorasickBuilder::new()
@ -13,7 +12,7 @@ lazy_static! {
.build(&["</script"]);
}
pub fn parse_script_content(cfg: &Cfg, code: &mut Code, lang: ScriptOrStyleLang) -> ParsedContent {
pub fn parse_script_content(code: &mut Code, lang: ScriptOrStyleLang) -> ParsedContent {
let (len, closing_tag_omitted) = match END.find(code.str()) {
Some(m) => (m.start(), false),
None => (code.rem(), true),

View File

@ -5,7 +5,6 @@ use lazy_static::lazy_static;
use crate::ast::{NodeData, ScriptOrStyleLang};
use crate::parse::content::ParsedContent;
use crate::parse::Code;
use crate::Cfg;
lazy_static! {
static ref END: AhoCorasick = AhoCorasickBuilder::new()
@ -13,7 +12,7 @@ lazy_static! {
.build(&["</style"]);
}
pub fn parse_style_content(cfg: &Cfg, code: &mut Code) -> ParsedContent {
pub fn parse_style_content(code: &mut Code) -> ParsedContent {
let (len, closing_tag_omitted) = match END.find(code.str()) {
Some(m) => (m.start(), false),
None => (code.rem(), true),

View File

@ -6,7 +6,6 @@ use crate::ast::NodeData;
use crate::parse::content::ParsedContent;
use crate::parse::Code;
use crate::spec::entity::decode::decode_entities;
use crate::Cfg;
lazy_static! {
static ref END: AhoCorasick = AhoCorasickBuilder::new()
@ -14,7 +13,7 @@ lazy_static! {
.build(&["</textarea"]);
}
pub fn parse_textarea_content(cfg: &Cfg, code: &mut Code) -> ParsedContent {
pub fn parse_textarea_content(code: &mut Code) -> ParsedContent {
let (len, closing_tag_omitted) = match END.find(code.str()) {
Some(m) => (m.start(), false),
None => (code.rem(), true),

View File

@ -1,2 +1,4 @@
pub mod decode;
pub mod encode;
#[cfg(test)]
mod tests;

View File

@ -0,0 +1,7 @@
use crate::spec::entity::encode::encode_ampersands;
#[test]
fn test_encode_ampersands_works_for_content() {
let out = encode_ampersands(b"1 is < than 2 <? </", false);
assert_eq!(out, b"1 is < than 2 &LT? &LT/".to_vec());
}

View File

@ -0,0 +1 @@
mod encode;

View File

@ -1,59 +1,28 @@
#[cfg(test)]
fn _eval(src: &'static [u8], expected: &'static [u8], cfg: &super::Cfg) -> () {
let mut code = src.to_vec();
match super::with_friendly_error(&mut code, cfg) {
Ok(len) => {
assert_eq!(
std::str::from_utf8(&code[..len]).unwrap(),
std::str::from_utf8(expected).unwrap()
);
}
Err(super::FriendlyError {
code_context,
message,
..
}) => {
println!("{}", message);
println!("{}", code_context);
assert!(false);
}
};
}
#[cfg(test)]
fn _eval_error(src: &'static [u8], expected: ErrorType, cfg: &super::Cfg) -> () {
let mut code = src.to_vec();
let min = super::minify(&mut code, cfg);
assert_eq!(
super::in_place(&mut code, cfg).unwrap_err().error_type,
expected
std::str::from_utf8(expected).unwrap(),
std::str::from_utf8(&min).unwrap()
);
}
#[cfg(test)]
fn eval(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(
src,
expected,
&super::Cfg {
minify_js: false,
minify_css: false,
minify_js: false,
omit_closing_tags: true,
remove_bangs: true,
remove_comments: true,
remove_processing_instructions: true,
remove_spaces_between_attributes: true,
},
);
}
#[cfg(test)]
fn eval_error(src: &'static [u8], expected: ErrorType) -> () {
_eval_error(
src,
expected,
&super::Cfg {
minify_js: false,
minify_css: false,
},
);
}
#[cfg(test)]
#[cfg(feature = "js-esbuild")]
fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(
@ -62,11 +31,15 @@ fn eval_with_js_min(src: &'static [u8], expected: &'static [u8]) -> () {
&super::Cfg {
minify_js: true,
minify_css: false,
omit_closing_tags: true,
remove_bangs: true,
remove_comments: true,
remove_processing_instructions: true,
remove_spaces_between_attributes: true,
},
);
}
#[cfg(test)]
#[cfg(feature = "js-esbuild")]
fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
_eval(
@ -75,6 +48,11 @@ fn eval_with_css_min(src: &'static [u8], expected: &'static [u8]) -> () {
&super::Cfg {
minify_js: false,
minify_css: true,
omit_closing_tags: true,
remove_bangs: true,
remove_comments: true,
remove_processing_instructions: true,
remove_spaces_between_attributes: true,
},
);
}
@ -183,35 +161,14 @@ fn test_parsing_with_omitted_tags() {
#[test]
fn test_unmatched_closing_tag() {
eval_error(b"Hello</p>Goodbye", ErrorType::UnexpectedClosingTag);
eval_error(b"Hello<br></br>Goodbye", ErrorType::UnexpectedClosingTag);
eval_error(
b"<div>Hello</p>Goodbye",
ErrorType::ClosingTagMismatch {
expected: "div".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<ul><li>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
eval_error(
b"<ul><li><rt>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
);
eval_error(
eval(b"Hello</p>Goodbye", b"Hello<p>Goodbye");
eval(b"Hello<br></br>Goodbye", b"Hello<br>Goodbye");
eval(b"<div>Hello</p>Goodbye", b"<div>Hello</p>Goodbye");
eval(b"<ul><li>a</p>", b"<ul><li>a<p>");
eval(b"<ul><li><rt>a</p>", b"<ul><li><rt>a<p>");
eval(
b"<html><head><body><ul><li><rt>a</p>",
ErrorType::ClosingTagMismatch {
expected: "ul".to_string(),
got: "p".to_string(),
},
b"<html><head><body><ul><li><rt>a<p>",
);
}