minify-html/build.rs

189 lines
5.6 KiB
Rust

use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use serde::{Deserialize, Serialize};
fn create_byte_string_literal(bytes: &[u8]) -> String {
format!("b\"{}\"", bytes
.iter()
.map(|&b| if b >= b' ' && b <= b'~' && b != b'\\' && b != b'"' {
(b as char).to_string()
} else {
format!("\\x{:02x}", b)
})
.collect::<String>())
}
fn read_json<T>(name: &str) -> T
where for<'de> T: Deserialize<'de> {
let patterns_path = Path::new("gen").join(format!("{}.json", name));
let patterns_file = File::open(patterns_path).unwrap();
serde_json::from_reader(patterns_file).unwrap()
}
fn write_rs(name: &str, code: String) -> () {
let out_dir = env::var("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name));
let mut dest_file = File::create(&dest_path).unwrap();
dest_file.write_all(code.as_bytes()).unwrap();
}
struct AutoIncrement {
next_val: usize,
}
impl AutoIncrement {
fn new() -> AutoIncrement {
AutoIncrement { next_val: 0 }
}
fn next(&mut self) -> usize {
let v = self.next_val;
self.next_val += 1;
v
}
}
struct TrieBuilderNode {
children: HashMap<char, TrieBuilderNode>,
value_as_code: Option<String>,
}
impl TrieBuilderNode {
fn new() -> TrieBuilderNode {
TrieBuilderNode {
children: HashMap::new(),
value_as_code: None,
}
}
fn add(&mut self, pat: &str, val: String) -> () {
let mut current = self;
for c in pat.chars() {
if !current.children.contains_key(&c) {
current.children.insert(c, TrieBuilderNode::new());
};
current = current.children.get_mut(&c).unwrap();
};
assert!(current.value_as_code.is_none());
current.value_as_code = Some(val);
}
fn build(&self, ai: &mut AutoIncrement, value_type: &'static str, out: &mut String) -> usize {
let child_ids: Vec<(char, usize)> = self.children
.iter()
.map(|(&c, n)| (c, n.build(ai, value_type, out)))
.collect();
let id = ai.next();
out.push_str(format!("static N{}: &TrieNode<{}> = &TrieNode::<{}> {{\n", id, value_type, value_type).as_str());
out.push_str(format!("children: phf::phf_map! {{\n").as_str());
for (c, n) in child_ids {
debug_assert!(c as u32 <= 0x7f);
out.push_str(format!("{}u8 => N{},\n", c as u8, n).as_str());
}
out.push_str("},\n");
out.push_str("value: ");
match &self.value_as_code {
Some(v) => {
out.push_str(format!("Some({})", v).as_str());
}
None => out.push_str("None"),
};
out.push_str(",\n};\n");
id
}
}
fn build_pattern(pattern: String) -> String {
assert!(pattern.is_ascii());
let seq = pattern.as_bytes();
let mut max_prefix_len = 0usize;
let mut table = vec![0usize; seq.len()];
let mut i = 1;
while i < seq.len() {
if seq[i] == seq[max_prefix_len] {
max_prefix_len += 1;
table[i] = max_prefix_len;
i += 1;
} else {
if max_prefix_len != 0 {
max_prefix_len = table[max_prefix_len - 1];
} else {
table[i] = 0;
i += 1;
};
};
};
format!("SinglePattern {{ seq: {}, table: &[{}] }}",
create_byte_string_literal(pattern.as_bytes()),
table.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "))
}
#[derive(Serialize, Deserialize, Debug)]
struct Entity {
codepoints: Vec<u32>,
characters: String,
}
fn generate_entities() {
// Read named entities map from JSON file.
let entities: HashMap<String, Entity> = read_json("entities");
// Add entities to trie builder.
let mut trie_builder = TrieBuilderNode::new();
for (rep, entity) in entities {
trie_builder.add(&rep[1..], create_byte_string_literal(entity.characters.as_bytes()));
};
// Generate trie code from builder.
let mut trie_code = String::new();
let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "&'static [u8]", &mut trie_code);
// Write trie code to output Rust file.
// Make trie root public and use proper variable name.
write_rs("entities", trie_code.replace(
format!("static N{}:", trie_root_id).as_str(),
"pub static ENTITY_REFERENCES:",
));
}
fn generate_patterns() {
let patterns: HashMap<String, String> = read_json("patterns");
for (name, pattern) in patterns {
let mut code = String::new();
code.push_str(format!("static {}: &SinglePattern = &{};", name, build_pattern(pattern)).as_str());
write_rs(format!("pattern_{}", name).as_str(), code);
};
}
fn generate_tries() {
let tries: HashMap<String, HashMap<String, String>> = read_json("tries");
for (name, values) in tries {
let mut trie_builder = TrieBuilderNode::new();
for (seq, value_code) in values {
trie_builder.add(seq.as_str(), value_code);
}
let mut trie_code = String::new();
let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "ContentType", &mut trie_code);
write_rs(format!("trie_{}", name).as_str(), trie_code.replace(
format!("static N{}:", trie_root_id).as_str(),
format!("static {}:", name).as_str(),
));
}
}
fn main() {
generate_entities();
generate_patterns();
generate_tries();
}