From 126c719d2366008bf857b6a40dabd63c8e48b627 Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Sun, 29 Dec 2019 21:51:25 +1100 Subject: [PATCH] Merge entity code; simplify build IO --- build.rs | 43 ++++++++++++++++++++++++------------------- src/spec/entity.rs | 8 -------- src/spec/mod.rs | 1 - src/unit/entity.rs | 15 +++++++++++---- 4 files changed, 35 insertions(+), 32 deletions(-) delete mode 100644 src/spec/entity.rs diff --git a/build.rs b/build.rs index b5fdc9a..b734ea1 100644 --- a/build.rs +++ b/build.rs @@ -17,6 +17,20 @@ fn create_byte_string_literal(bytes: &[u8]) -> String { .collect::()) } +fn read_json(name: &str) -> T + where for<'de> T: Deserialize<'de> { + let patterns_path = Path::new("gen").join(format!("{}.json", name)); + let patterns_file = File::open(patterns_path).unwrap(); + serde_json::from_reader(patterns_file).unwrap() +} + +fn write_rs(name: &str, code: String) -> () { + let out_dir = env::var("OUT_DIR").unwrap(); + let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name)); + let mut dest_file = File::create(&dest_path).unwrap(); + dest_file.write_all(code.as_bytes()).unwrap(); +} + struct AutoIncrement { next_val: usize, } @@ -107,8 +121,8 @@ fn build_pattern(pattern: String) -> String { }; format!("SinglePattern {{ seq: {}, table: &[{}] }}", - create_byte_string_literal(pattern.as_bytes()), - table.iter().map(|v| v.to_string()).collect::>().join(", ")) + create_byte_string_literal(pattern.as_bytes()), + table.iter().map(|v| v.to_string()).collect::>().join(", ")) } #[derive(Serialize, Deserialize, Debug)] @@ -119,9 +133,7 @@ struct Entity { fn generate_entities() { // Read named entities map from JSON file. - let entities_path = Path::new("gen").join("entities.json"); - let entities_file = File::open(entities_path).unwrap(); - let entities: HashMap = serde_json::from_reader(entities_file).unwrap(); + let entities: HashMap = read_json("entities"); // Add entities to trie builder. let mut trie_builder = TrieBuilderNode::new(); @@ -133,20 +145,16 @@ fn generate_entities() { let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "&'static [u8]", &mut trie_code); // Write trie code to output Rust file. - let out_dir = env::var("OUT_DIR").unwrap(); - let dest_path = Path::new(&out_dir).join("gen_entities.rs"); - let mut dest_file = File::create(&dest_path).unwrap(); - dest_file.write_all(trie_code - // Make trie root public and use proper variable name. - .replace(format!("static N{}:", trie_root_id).as_str(), "pub static ENTITY_REFERENCES:") - .as_bytes()).unwrap(); + // Make trie root public and use proper variable name. + write_rs("entities", trie_code.replace( + format!("static N{}:", trie_root_id).as_str(), + "pub static ENTITY_REFERENCES:", + )); } fn generate_patterns() { // Read named entities map from JSON file. - let patterns_path = Path::new("gen").join("patterns.json"); - let patterns_file = File::open(patterns_path).unwrap(); - let patterns: HashMap = serde_json::from_reader(patterns_file).unwrap(); + let patterns: HashMap = read_json("patterns"); // Add entities to trie builder. let mut code = String::new(); @@ -155,10 +163,7 @@ fn generate_patterns() { }; // Write trie code to output Rust file. - let out_dir = env::var("OUT_DIR").unwrap(); - let dest_path = Path::new(&out_dir).join("gen_patterns.rs"); - let mut dest_file = File::create(&dest_path).unwrap(); - dest_file.write_all(code.as_bytes()).unwrap(); + write_rs("patterns", code); } fn main() { diff --git a/src/spec/entity.rs b/src/spec/entity.rs deleted file mode 100644 index 2347667..0000000 --- a/src/spec/entity.rs +++ /dev/null @@ -1,8 +0,0 @@ -use phf::phf_map; -use crate::pattern::TrieNode; - -include!(concat!(env!("OUT_DIR"), "/gen_entities.rs")); - -pub fn is_valid_entity_reference_name_char(c: u8) -> bool { - c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z' -} diff --git a/src/spec/mod.rs b/src/spec/mod.rs index 013caa6..55b1257 100644 --- a/src/spec/mod.rs +++ b/src/spec/mod.rs @@ -1,3 +1,2 @@ pub mod codepoint; -pub mod entity; pub mod tag; diff --git a/src/unit/entity.rs b/src/unit/entity.rs index c3c57be..902961f 100644 --- a/src/unit/entity.rs +++ b/src/unit/entity.rs @@ -1,3 +1,9 @@ +use crate::err::ProcessingResult; +use crate::proc::{Processor, ProcessorRange}; +use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit}; +use phf::phf_map; +use crate::pattern::TrieNode; + // The minimum length of any entity is 3, which is a character entity reference // with a single character name. The longest UTF-8 representation of a Unicode // code point is 4 bytes. Because there are no character entity references with @@ -17,10 +23,11 @@ // - Names must match case sensitively. // - Entities that don't have a semicolon do work e.g. `&2` => `&2`. -use crate::err::ProcessingResult; -use crate::proc::{Processor, ProcessorRange}; -use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit}; -use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char}; +include!(concat!(env!("OUT_DIR"), "/gen_entities.rs")); + +fn is_valid_entity_reference_name_char(c: u8) -> bool { + c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z' +} #[derive(Clone, Copy)] pub enum EntityType {