141 lines
4.7 KiB
Rust
141 lines
4.7 KiB
Rust
use std::io::Write;
|
|
|
|
use aho_corasick::{AhoCorasickBuilder, MatchKind};
|
|
use lazy_static::lazy_static;
|
|
|
|
use crate::ast::{ElementClosingTag, NodeData};
|
|
use crate::common::pattern::Replacer;
|
|
|
|
lazy_static! {
|
|
static ref TEXT_REPLACER: Replacer = Replacer::new(
|
|
AhoCorasickBuilder::new()
|
|
.dfa(true)
|
|
.match_kind(MatchKind::LeftmostLongest)
|
|
.build(vec![b"&".to_vec(), b"<".to_vec(),]),
|
|
vec![b"&".to_vec(), b"<".to_vec(),],
|
|
);
|
|
static ref DOUBLE_QUOTED_REPLACER: Replacer = Replacer::new(
|
|
AhoCorasickBuilder::new()
|
|
.dfa(true)
|
|
.match_kind(MatchKind::LeftmostLongest)
|
|
.build(vec![b"&".to_vec(), b"\"".to_vec(),]),
|
|
vec![b"&".to_vec(), b""".to_vec(),],
|
|
);
|
|
static ref SINGLE_QUOTED_REPLACER: Replacer = Replacer::new(
|
|
AhoCorasickBuilder::new()
|
|
.dfa(true)
|
|
.match_kind(MatchKind::LeftmostLongest)
|
|
.build(vec![b"&".to_vec(), b"'".to_vec(),]),
|
|
vec![b"&".to_vec(), b"'".to_vec(),],
|
|
);
|
|
static ref UNQUOTED_REPLACER: Replacer = Replacer::new(
|
|
AhoCorasickBuilder::new()
|
|
.dfa(true)
|
|
.match_kind(MatchKind::LeftmostLongest)
|
|
.build(vec![
|
|
b"&".to_vec(),
|
|
b">".to_vec(),
|
|
b"\"".to_vec(),
|
|
b"'".to_vec(),
|
|
b"\x09".to_vec(),
|
|
b"\x0a".to_vec(),
|
|
b"\x0c".to_vec(),
|
|
b"\x0d".to_vec(),
|
|
b"\x20".to_vec(),
|
|
]),
|
|
vec![
|
|
b"&".to_vec(),
|
|
b">".to_vec(),
|
|
b""".to_vec(),
|
|
b"'".to_vec(),
|
|
b"	".to_vec(),
|
|
b" ".to_vec(),
|
|
b"".to_vec(),
|
|
b" ".to_vec(),
|
|
b" ".to_vec(),
|
|
],
|
|
);
|
|
}
|
|
|
|
pub fn c14n_serialise_ast<T: Write>(out: &mut T, node: &NodeData) -> std::io::Result<()> {
|
|
match node {
|
|
NodeData::Bang { code, .. } => {
|
|
out.write_all(b"<!")?;
|
|
out.write_all(code)?;
|
|
out.write_all(b">")?;
|
|
}
|
|
NodeData::Comment { code, .. } => {
|
|
out.write_all(b"<!--")?;
|
|
out.write_all(code)?;
|
|
out.write_all(b"-->")?;
|
|
}
|
|
NodeData::Doctype { legacy, .. } => {
|
|
out.write_all(b"<!DOCTYPE html")?;
|
|
if !legacy.is_empty() {
|
|
out.write_all(b" ")?;
|
|
out.write_all(legacy)?;
|
|
};
|
|
out.write_all(b">")?;
|
|
}
|
|
NodeData::Element {
|
|
attributes,
|
|
closing_tag,
|
|
children,
|
|
name,
|
|
..
|
|
} => {
|
|
out.write_all(b"<")?;
|
|
out.write_all(name)?;
|
|
let mut attrs_sorted = attributes.iter().collect::<Vec<_>>();
|
|
attrs_sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
|
for (name, value) in attrs_sorted.iter() {
|
|
out.write_all(b" ")?;
|
|
out.write_all(name)?;
|
|
if !value.value.is_empty() {
|
|
out.write_all(b"=")?;
|
|
match value.quote {
|
|
Some(b'"') => {
|
|
out.write_all(b"\"")?;
|
|
out.write_all(&DOUBLE_QUOTED_REPLACER.replace_all(&value.value))?;
|
|
out.write_all(b"\"")?;
|
|
}
|
|
Some(b'\'') => {
|
|
out.write_all(b"'")?;
|
|
out.write_all(&SINGLE_QUOTED_REPLACER.replace_all(&value.value))?;
|
|
out.write_all(b"'")?;
|
|
}
|
|
None => {
|
|
out.write_all(&UNQUOTED_REPLACER.replace_all(&value.value))?;
|
|
}
|
|
_ => unreachable!(),
|
|
};
|
|
};
|
|
}
|
|
if closing_tag == &ElementClosingTag::SelfClosing {
|
|
out.write_all(b" /")?;
|
|
};
|
|
out.write_all(b">")?;
|
|
for c in children {
|
|
c14n_serialise_ast(out, c)?;
|
|
}
|
|
if closing_tag == &ElementClosingTag::Present {
|
|
out.write_all(b"</")?;
|
|
out.write_all(name)?;
|
|
out.write_all(b">")?;
|
|
};
|
|
}
|
|
NodeData::Instruction { code, .. } => {
|
|
out.write_all(b"<?")?;
|
|
out.write_all(code)?;
|
|
out.write_all(b"?>")?;
|
|
}
|
|
NodeData::ScriptOrStyleContent { code, .. } => {
|
|
out.write_all(code)?;
|
|
}
|
|
NodeData::Text { value } => {
|
|
out.write_all(&TEXT_REPLACER.replace_all(value))?;
|
|
}
|
|
};
|
|
Ok(())
|
|
}
|