Improve handling of RCDATA text content in edge cases
This commit is contained in:
parent
c8649df0af
commit
013afa007e
|
@ -1,5 +1,9 @@
|
|||
# minify-html changelog
|
||||
|
||||
## Pending
|
||||
|
||||
- Improve handling of RCDATA text content in edge cases.
|
||||
|
||||
## 0.10.5
|
||||
|
||||
- Do not encode entities in RCDATA text content (e.g. contents of `<textarea>` and `<title>`).
|
||||
|
|
|
@ -129,8 +129,8 @@ pub fn c14n_serialise_ast<T: Write>(out: &mut T, node: &NodeData) -> std::io::Re
|
|||
out.write_all(code)?;
|
||||
out.write_all(b"?>")?;
|
||||
}
|
||||
NodeData::RcdataContent { content } => {
|
||||
out.write_all(content)?;
|
||||
NodeData::RcdataContent { typ, text } => {
|
||||
out.write_all(&TEXT_REPLACER.replace_all(text))?;
|
||||
}
|
||||
NodeData::ScriptOrStyleContent { code, .. } => {
|
||||
out.write_all(code)?;
|
||||
|
|
|
@ -48,6 +48,12 @@ impl PartialEq for AttrVal {
|
|||
|
||||
impl Eq for AttrVal {}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub enum RcdataContentType {
|
||||
Textarea,
|
||||
Title,
|
||||
}
|
||||
|
||||
// Derive Eq for testing.
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub enum NodeData {
|
||||
|
@ -85,7 +91,8 @@ pub enum NodeData {
|
|||
},
|
||||
// <title> or <textarea> element contents.
|
||||
RcdataContent {
|
||||
content: Vec<u8>,
|
||||
typ: RcdataContentType,
|
||||
text: Vec<u8>,
|
||||
},
|
||||
// Entities should not be decoded in ScriptOrStyleContent.
|
||||
ScriptOrStyleContent {
|
||||
|
@ -143,9 +150,10 @@ impl Debug for NodeData {
|
|||
.field("code", &from_utf8(code).unwrap().to_string())
|
||||
.field("ended", ended)
|
||||
.finish(),
|
||||
NodeData::RcdataContent { content } => f
|
||||
NodeData::RcdataContent { typ, text } => f
|
||||
.debug_struct("RcdataContent")
|
||||
.field("content", &from_utf8(content).unwrap().to_string())
|
||||
.field("typ", typ)
|
||||
.field("text", &from_utf8(text).unwrap().to_string())
|
||||
.finish(),
|
||||
NodeData::ScriptOrStyleContent { code, lang } => f
|
||||
.debug_struct("ScriptOrStyleContent")
|
||||
|
|
|
@ -144,7 +144,7 @@ pub fn minify_content(
|
|||
children,
|
||||
),
|
||||
NodeData::Instruction { code, ended } => minify_instruction(cfg, out, &code, ended),
|
||||
NodeData::RcdataContent { content } => minify_rcdata(cfg, out, &content),
|
||||
NodeData::RcdataContent { typ, text } => minify_rcdata(out, typ, &text),
|
||||
NodeData::ScriptOrStyleContent { code, lang } => match lang {
|
||||
ScriptOrStyleLang::CSS => minify_css(cfg, out, &code),
|
||||
ScriptOrStyleLang::Data => out.extend_from_slice(&code),
|
||||
|
|
|
@ -1,5 +1,17 @@
|
|||
use crate::cfg::Cfg;
|
||||
use crate::{ast::RcdataContentType, tag::{TAG_TEXTAREA_END, TAG_TITLE_END}, entity::encode::encode_entities};
|
||||
|
||||
pub fn minify_rcdata(cfg: &Cfg, out: &mut Vec<u8>, content: &[u8]) {
|
||||
out.extend_from_slice(content);
|
||||
pub fn minify_rcdata(out: &mut Vec<u8>, typ: RcdataContentType, text: &[u8]) {
|
||||
// Encode entities, since they're still decoded by the browser.
|
||||
let html = encode_entities(text, false);
|
||||
|
||||
// Since the text has been decoded, there may be unintentional matches to end tags that we must escape.
|
||||
let html = match typ {
|
||||
RcdataContentType::Textarea => &*TAG_TEXTAREA_END,
|
||||
RcdataContentType::Title => &*TAG_TITLE_END,
|
||||
}.replace_all_bytes(&html, &[match typ {
|
||||
RcdataContentType::Textarea => b"</textarea".as_slice(),
|
||||
RcdataContentType::Title => b"</title".as_slice(),
|
||||
}]);
|
||||
|
||||
out.extend_from_slice(&html);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::ast::NodeData;
|
||||
use crate::ast::{NodeData, RcdataContentType};
|
||||
use crate::entity::decode::decode_entities;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
|
@ -12,7 +12,8 @@ pub fn parse_textarea_content(code: &mut Code) -> ParsedContent {
|
|||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![NodeData::RcdataContent {
|
||||
content: decode_entities(code.slice_and_shift(len), false),
|
||||
typ: RcdataContentType::Textarea,
|
||||
text: decode_entities(code.slice_and_shift(len), false),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::ast::NodeData;
|
||||
use crate::ast::{NodeData, RcdataContentType};
|
||||
use crate::entity::decode::decode_entities;
|
||||
use crate::parse::content::ParsedContent;
|
||||
use crate::parse::Code;
|
||||
|
@ -12,7 +12,8 @@ pub fn parse_title_content(code: &mut Code) -> ParsedContent {
|
|||
ParsedContent {
|
||||
closing_tag_omitted,
|
||||
children: vec![NodeData::RcdataContent {
|
||||
content: decode_entities(code.slice_and_shift(len), false),
|
||||
typ: RcdataContentType::Title,
|
||||
text: decode_entities(code.slice_and_shift(len), false),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue