Merge entity code; simplify build IO
This commit is contained in:
parent
7f56cff39e
commit
126c719d23
43
build.rs
43
build.rs
|
@ -17,6 +17,20 @@ fn create_byte_string_literal(bytes: &[u8]) -> String {
|
|||
.collect::<String>())
|
||||
}
|
||||
|
||||
fn read_json<T>(name: &str) -> T
|
||||
where for<'de> T: Deserialize<'de> {
|
||||
let patterns_path = Path::new("gen").join(format!("{}.json", name));
|
||||
let patterns_file = File::open(patterns_path).unwrap();
|
||||
serde_json::from_reader(patterns_file).unwrap()
|
||||
}
|
||||
|
||||
fn write_rs(name: &str, code: String) -> () {
|
||||
let out_dir = env::var("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name));
|
||||
let mut dest_file = File::create(&dest_path).unwrap();
|
||||
dest_file.write_all(code.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
struct AutoIncrement {
|
||||
next_val: usize,
|
||||
}
|
||||
|
@ -107,8 +121,8 @@ fn build_pattern(pattern: String) -> String {
|
|||
};
|
||||
|
||||
format!("SinglePattern {{ seq: {}, table: &[{}] }}",
|
||||
create_byte_string_literal(pattern.as_bytes()),
|
||||
table.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "))
|
||||
create_byte_string_literal(pattern.as_bytes()),
|
||||
table.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "))
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
|
@ -119,9 +133,7 @@ struct Entity {
|
|||
|
||||
fn generate_entities() {
|
||||
// Read named entities map from JSON file.
|
||||
let entities_path = Path::new("gen").join("entities.json");
|
||||
let entities_file = File::open(entities_path).unwrap();
|
||||
let entities: HashMap<String, Entity> = serde_json::from_reader(entities_file).unwrap();
|
||||
let entities: HashMap<String, Entity> = read_json("entities");
|
||||
|
||||
// Add entities to trie builder.
|
||||
let mut trie_builder = TrieBuilderNode::new();
|
||||
|
@ -133,20 +145,16 @@ fn generate_entities() {
|
|||
let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "&'static [u8]", &mut trie_code);
|
||||
|
||||
// Write trie code to output Rust file.
|
||||
let out_dir = env::var("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join("gen_entities.rs");
|
||||
let mut dest_file = File::create(&dest_path).unwrap();
|
||||
dest_file.write_all(trie_code
|
||||
// Make trie root public and use proper variable name.
|
||||
.replace(format!("static N{}:", trie_root_id).as_str(), "pub static ENTITY_REFERENCES:")
|
||||
.as_bytes()).unwrap();
|
||||
// Make trie root public and use proper variable name.
|
||||
write_rs("entities", trie_code.replace(
|
||||
format!("static N{}:", trie_root_id).as_str(),
|
||||
"pub static ENTITY_REFERENCES:",
|
||||
));
|
||||
}
|
||||
|
||||
fn generate_patterns() {
|
||||
// Read named entities map from JSON file.
|
||||
let patterns_path = Path::new("gen").join("patterns.json");
|
||||
let patterns_file = File::open(patterns_path).unwrap();
|
||||
let patterns: HashMap<String, String> = serde_json::from_reader(patterns_file).unwrap();
|
||||
let patterns: HashMap<String, String> = read_json("patterns");
|
||||
|
||||
// Add entities to trie builder.
|
||||
let mut code = String::new();
|
||||
|
@ -155,10 +163,7 @@ fn generate_patterns() {
|
|||
};
|
||||
|
||||
// Write trie code to output Rust file.
|
||||
let out_dir = env::var("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join("gen_patterns.rs");
|
||||
let mut dest_file = File::create(&dest_path).unwrap();
|
||||
dest_file.write_all(code.as_bytes()).unwrap();
|
||||
write_rs("patterns", code);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
use phf::phf_map;
|
||||
use crate::pattern::TrieNode;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_entities.rs"));
|
||||
|
||||
pub fn is_valid_entity_reference_name_char(c: u8) -> bool {
|
||||
c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z'
|
||||
}
|
|
@ -1,3 +1,2 @@
|
|||
pub mod codepoint;
|
||||
pub mod entity;
|
||||
pub mod tag;
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
use crate::err::ProcessingResult;
|
||||
use crate::proc::{Processor, ProcessorRange};
|
||||
use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit};
|
||||
use phf::phf_map;
|
||||
use crate::pattern::TrieNode;
|
||||
|
||||
// The minimum length of any entity is 3, which is a character entity reference
|
||||
// with a single character name. The longest UTF-8 representation of a Unicode
|
||||
// code point is 4 bytes. Because there are no character entity references with
|
||||
|
@ -17,10 +23,11 @@
|
|||
// - Names must match case sensitively.
|
||||
// - Entities that don't have a semicolon do work e.g. `&2` => `&2`.
|
||||
|
||||
use crate::err::ProcessingResult;
|
||||
use crate::proc::{Processor, ProcessorRange};
|
||||
use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit};
|
||||
use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
|
||||
include!(concat!(env!("OUT_DIR"), "/gen_entities.rs"));
|
||||
|
||||
fn is_valid_entity_reference_name_char(c: u8) -> bool {
|
||||
c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z'
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum EntityType {
|
||||
|
|
Loading…
Reference in New Issue