use std::collections::HashMap; use std::env; use std::fs::File; use std::io::Write; use std::path::Path; use serde::{Deserialize, Serialize}; fn create_byte_string_literal(bytes: &[u8]) -> String { format!("b\"{}\"", bytes .iter() .map(|&b| if b >= b' ' && b <= b'~' && b != b'\\' && b != b'"' { (b as char).to_string() } else { format!("\\x{:02x}", b) }) .collect::()) } fn read_json(name: &str) -> T where for<'de> T: Deserialize<'de> { let patterns_path = Path::new("gen").join(format!("{}.json", name)); let patterns_file = File::open(patterns_path).unwrap(); serde_json::from_reader(patterns_file).unwrap() } fn write_rs(name: &str, code: String) -> () { let out_dir = env::var("OUT_DIR").unwrap(); let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name)); let mut dest_file = File::create(&dest_path).unwrap(); dest_file.write_all(code.as_bytes()).unwrap(); } struct AutoIncrement { next_val: usize, } impl AutoIncrement { fn new() -> AutoIncrement { AutoIncrement { next_val: 0 } } fn next(&mut self) -> usize { let v = self.next_val; self.next_val += 1; v } } struct TrieBuilderNode { children: HashMap, value_as_code: Option, } impl TrieBuilderNode { fn new() -> TrieBuilderNode { TrieBuilderNode { children: HashMap::new(), value_as_code: None, } } fn add(&mut self, pat: &str, val: String) -> () { let mut current = self; for c in pat.chars() { if !current.children.contains_key(&c) { current.children.insert(c, TrieBuilderNode::new()); }; current = current.children.get_mut(&c).unwrap(); }; assert!(current.value_as_code.is_none()); current.value_as_code = Some(val); } fn build(&self, ai: &mut AutoIncrement, value_type: &'static str, out: &mut String) -> usize { let child_ids: Vec<(char, usize)> = self.children .iter() .map(|(&c, n)| (c, n.build(ai, value_type, out))) .collect(); let id = ai.next(); out.push_str(format!("static N{}: &TrieNode<{}> = &TrieNode::<{}> {{\n", id, value_type, value_type).as_str()); out.push_str(format!("children: phf::phf_map! {{\n").as_str()); for (c, n) in child_ids { debug_assert!(c as u32 <= 0x7f); out.push_str(format!("{}u8 => N{},\n", c as u8, n).as_str()); } out.push_str("},\n"); out.push_str("value: "); match &self.value_as_code { Some(v) => { out.push_str(format!("Some({})", v).as_str()); } None => out.push_str("None"), }; out.push_str(",\n};\n"); id } } fn build_pattern(pattern: String) -> String { assert!(pattern.is_ascii()); let seq = pattern.as_bytes(); let mut max_prefix_len = 0usize; let mut table = vec![0usize; seq.len()]; let mut i = 1; while i < seq.len() { if seq[i] == seq[max_prefix_len] { max_prefix_len += 1; table[i] = max_prefix_len; i += 1; } else { if max_prefix_len != 0 { max_prefix_len = table[max_prefix_len - 1]; } else { table[i] = 0; i += 1; }; }; }; format!("SinglePattern {{ seq: {}, table: &[{}] }}", create_byte_string_literal(pattern.as_bytes()), table.iter().map(|v| v.to_string()).collect::>().join(", ")) } #[derive(Serialize, Deserialize, Debug)] struct Entity { codepoints: Vec, characters: String, } fn generate_entities() { // Read named entities map from JSON file. let entities: HashMap = read_json("entities"); // Add entities to trie builder. let mut trie_builder = TrieBuilderNode::new(); for (rep, entity) in entities { trie_builder.add(&rep[1..], create_byte_string_literal(entity.characters.as_bytes())); }; // Generate trie code from builder. let mut trie_code = String::new(); let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "&'static [u8]", &mut trie_code); // Write trie code to output Rust file. // Make trie root public and use proper variable name. write_rs("entities", trie_code.replace( format!("static N{}:", trie_root_id).as_str(), "pub static ENTITY_REFERENCES:", )); } fn generate_patterns() { let patterns: HashMap = read_json("patterns"); for (name, pattern) in patterns { let mut code = String::new(); code.push_str(format!("static {}: &SinglePattern = &{};", name, build_pattern(pattern)).as_str()); write_rs(format!("pattern_{}", name).as_str(), code); }; } fn generate_tries() { let tries: HashMap> = read_json("tries"); for (name, values) in tries { let mut trie_builder = TrieBuilderNode::new(); for (seq, value_code) in values { trie_builder.add(seq.as_str(), value_code); } let mut trie_code = String::new(); let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "ContentType", &mut trie_code); write_rs(format!("trie_{}", name).as_str(), trie_code.replace( format!("static N{}:", trie_root_id).as_str(), format!("static {}:", name).as_str(), )); } } fn main() { generate_entities(); generate_patterns(); generate_tries(); }