Do not encode entities in RCDATA text content
This commit is contained in:
parent
f9dd173c7e
commit
cdd56e8667
|
@ -1,5 +1,9 @@
|
|||
# minify-html changelog
|
||||
|
||||
## Pending
|
||||
|
||||
- Do not encode entities in RCDATA text content (e.g. contents of `<textarea>` and `<title>`).
|
||||
|
||||
## 0.10.4
|
||||
|
||||
- Use FxHasher for internal hash-based data structures.
|
||||
|
|
|
@ -129,6 +129,9 @@ pub fn c14n_serialise_ast<T: Write>(out: &mut T, node: &NodeData) -> std::io::Re
|
|||
out.write_all(code)?;
|
||||
out.write_all(b"?>")?;
|
||||
}
|
||||
NodeData::RcdataContent { content } => {
|
||||
out.write_all(content)?;
|
||||
}
|
||||
NodeData::ScriptOrStyleContent { code, .. } => {
|
||||
out.write_all(code)?;
|
||||
}
|
||||
|
|
|
@ -83,6 +83,10 @@ pub enum NodeData {
|
|||
// If the source unexpectedly ended before `?>`, we can't add it, as otherwise output could be longer than source.
|
||||
ended: bool,
|
||||
},
|
||||
// <title> or <textarea> element contents.
|
||||
RcdataContent {
|
||||
content: Vec<u8>,
|
||||
},
|
||||
// Entities should not be decoded in ScriptOrStyleContent.
|
||||
ScriptOrStyleContent {
|
||||
code: Vec<u8>,
|
||||
|
@ -139,6 +143,10 @@ impl Debug for NodeData {
|
|||
.field("code", &from_utf8(code).unwrap().to_string())
|
||||
.field("ended", ended)
|
||||
.finish(),
|
||||
NodeData::RcdataContent { content } => f
|
||||
.debug_struct("RcdataContent")
|
||||
.field("content", &from_utf8(content).unwrap().to_string())
|
||||
.finish(),
|
||||
NodeData::ScriptOrStyleContent { code, lang } => f
|
||||
.debug_struct("ScriptOrStyleContent")
|
||||
.field("code", &from_utf8(code).unwrap().to_string())
|
||||
|
|
|
@ -14,6 +14,7 @@ mod common;
|
|||
mod entity;
|
||||
mod minify;
|
||||
mod parse;
|
||||
mod tag;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@ use crate::minify::element::minify_element;
|
|||
use crate::minify::instruction::minify_instruction;
|
||||
use crate::minify::js::minify_js;
|
||||
|
||||
use super::rcdata::minify_rcdata;
|
||||
|
||||
fn build_chevron_replacer() -> Replacer {
|
||||
let mut patterns = Vec::<Vec<u8>>::new();
|
||||
let mut replacements = Vec::<Vec<u8>>::new();
|
||||
|
@ -142,6 +144,7 @@ pub fn minify_content(
|
|||
children,
|
||||
),
|
||||
NodeData::Instruction { code, ended } => minify_instruction(cfg, out, &code, ended),
|
||||
NodeData::RcdataContent { content } => minify_rcdata(cfg, out, &content),
|
||||
NodeData::ScriptOrStyleContent { code, lang } => match lang {
|
||||
ScriptOrStyleLang::CSS => minify_css(cfg, out, &code),
|
||||
ScriptOrStyleLang::Data => out.extend_from_slice(&code),
|
||||
|
|
|
@ -7,5 +7,6 @@ pub mod doctype;
|
|||
pub mod element;
|
||||
pub mod instruction;
|
||||
pub mod js;
|
||||
pub mod rcdata;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
use crate::cfg::Cfg;
|
||||
|
||||
pub fn minify_rcdata(cfg: &Cfg, out: &mut Vec<u8>, content: &[u8]) {
|
||||
out.extend_from_slice(content);
|
||||
}
|
|
@ -164,8 +164,8 @@ pub fn parse_element(code: &mut Code, ns: Namespace, parent: &[u8]) -> NodeData
|
|||
let ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children,
|
||||
} = match elem_name.as_slice() {
|
||||
b"script" => match attributes.get(b"type".as_ref()) {
|
||||
} = match (ns, elem_name.as_slice()) {
|
||||
(_, b"script") => match attributes.get(b"type".as_ref()) {
|
||||
Some(mime) if !JAVASCRIPT_MIME_TYPES.contains(mime.as_slice()) => {
|
||||
parse_script_content(code, ScriptOrStyleLang::Data)
|
||||
}
|
||||
|
@ -174,9 +174,9 @@ pub fn parse_element(code: &mut Code, ns: Namespace, parent: &[u8]) -> NodeData
|
|||
}
|
||||
_ => parse_script_content(code, ScriptOrStyleLang::JS),
|
||||
},
|
||||
b"style" => parse_style_content(code),
|
||||
b"textarea" => parse_textarea_content(code),
|
||||
b"title" => parse_title_content(code),
|
||||
(_, b"style") => parse_style_content(code),
|
||||
(Namespace::Html, b"textarea") => parse_textarea_content(code),
|
||||
(Namespace::Html, b"title") => parse_title_content(code),
|
||||
_ => parse_content(code, ns, parent, &elem_name),
|
||||
};
|
||||
|
||||
|
|
|
@ -1,27 +1,18 @@
|
|||
use aho_corasick::AhoCorasick;
|
||||
use aho_corasick::AhoCorasickBuilder;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::NodeData;
|
||||
use crate::entity::decode::decode_entities;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
|
||||
lazy_static! {
|
||||
static ref END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(&["</textarea"]);
|
||||
}
|
||||
use crate::tag::TAG_TEXTAREA_END;
|
||||
|
||||
pub fn parse_textarea_content(code: &mut Code) -> ParsedContent {
|
||||
let (len, closing_tag_omitted) = match END.find(code.as_slice()) {
|
||||
let (len, closing_tag_omitted) = match TAG_TEXTAREA_END.find(code.as_slice()) {
|
||||
Some(m) => (m.start(), false),
|
||||
None => (code.rem(), true),
|
||||
};
|
||||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![NodeData::Text {
|
||||
value: decode_entities(code.slice_and_shift(len), false),
|
||||
children: vec![NodeData::RcdataContent {
|
||||
content: decode_entities(code.slice_and_shift(len), false),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,27 +1,18 @@
|
|||
use aho_corasick::AhoCorasick;
|
||||
use aho_corasick::AhoCorasickBuilder;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::ast::NodeData;
|
||||
use crate::entity::decode::decode_entities;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
|
||||
lazy_static! {
|
||||
static ref END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(&["</title"]);
|
||||
}
|
||||
use crate::tag::TAG_TITLE_END;
|
||||
|
||||
pub fn parse_title_content(code: &mut Code) -> ParsedContent {
|
||||
let (len, closing_tag_omitted) = match END.find(code.as_slice()) {
|
||||
let (len, closing_tag_omitted) = match TAG_TITLE_END.find(code.as_slice()) {
|
||||
Some(m) => (m.start(), false),
|
||||
None => (code.rem(), true),
|
||||
};
|
||||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![NodeData::Text {
|
||||
value: decode_entities(code.slice_and_shift(len), false),
|
||||
children: vec![NodeData::RcdataContent {
|
||||
content: decode_entities(code.slice_and_shift(len), false),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
use aho_corasick::AhoCorasick;
|
||||
use aho_corasick::AhoCorasickBuilder;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref TAG_TEXTAREA_END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(&["</textarea"]);
|
||||
pub static ref TAG_TITLE_END: AhoCorasick = AhoCorasickBuilder::new()
|
||||
.ascii_case_insensitive(true)
|
||||
.build(&["</title"]);
|
||||
}
|
Loading…
Reference in New Issue