Merge entity code; simplify build IO
This commit is contained in:
parent
7f56cff39e
commit
126c719d23
37
build.rs
37
build.rs
|
@ -17,6 +17,20 @@ fn create_byte_string_literal(bytes: &[u8]) -> String {
|
||||||
.collect::<String>())
|
.collect::<String>())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_json<T>(name: &str) -> T
|
||||||
|
where for<'de> T: Deserialize<'de> {
|
||||||
|
let patterns_path = Path::new("gen").join(format!("{}.json", name));
|
||||||
|
let patterns_file = File::open(patterns_path).unwrap();
|
||||||
|
serde_json::from_reader(patterns_file).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_rs(name: &str, code: String) -> () {
|
||||||
|
let out_dir = env::var("OUT_DIR").unwrap();
|
||||||
|
let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name));
|
||||||
|
let mut dest_file = File::create(&dest_path).unwrap();
|
||||||
|
dest_file.write_all(code.as_bytes()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
struct AutoIncrement {
|
struct AutoIncrement {
|
||||||
next_val: usize,
|
next_val: usize,
|
||||||
}
|
}
|
||||||
|
@ -119,9 +133,7 @@ struct Entity {
|
||||||
|
|
||||||
fn generate_entities() {
|
fn generate_entities() {
|
||||||
// Read named entities map from JSON file.
|
// Read named entities map from JSON file.
|
||||||
let entities_path = Path::new("gen").join("entities.json");
|
let entities: HashMap<String, Entity> = read_json("entities");
|
||||||
let entities_file = File::open(entities_path).unwrap();
|
|
||||||
let entities: HashMap<String, Entity> = serde_json::from_reader(entities_file).unwrap();
|
|
||||||
|
|
||||||
// Add entities to trie builder.
|
// Add entities to trie builder.
|
||||||
let mut trie_builder = TrieBuilderNode::new();
|
let mut trie_builder = TrieBuilderNode::new();
|
||||||
|
@ -133,20 +145,16 @@ fn generate_entities() {
|
||||||
let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "&'static [u8]", &mut trie_code);
|
let trie_root_id = trie_builder.build(&mut AutoIncrement::new(), "&'static [u8]", &mut trie_code);
|
||||||
|
|
||||||
// Write trie code to output Rust file.
|
// Write trie code to output Rust file.
|
||||||
let out_dir = env::var("OUT_DIR").unwrap();
|
|
||||||
let dest_path = Path::new(&out_dir).join("gen_entities.rs");
|
|
||||||
let mut dest_file = File::create(&dest_path).unwrap();
|
|
||||||
dest_file.write_all(trie_code
|
|
||||||
// Make trie root public and use proper variable name.
|
// Make trie root public and use proper variable name.
|
||||||
.replace(format!("static N{}:", trie_root_id).as_str(), "pub static ENTITY_REFERENCES:")
|
write_rs("entities", trie_code.replace(
|
||||||
.as_bytes()).unwrap();
|
format!("static N{}:", trie_root_id).as_str(),
|
||||||
|
"pub static ENTITY_REFERENCES:",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn generate_patterns() {
|
fn generate_patterns() {
|
||||||
// Read named entities map from JSON file.
|
// Read named entities map from JSON file.
|
||||||
let patterns_path = Path::new("gen").join("patterns.json");
|
let patterns: HashMap<String, String> = read_json("patterns");
|
||||||
let patterns_file = File::open(patterns_path).unwrap();
|
|
||||||
let patterns: HashMap<String, String> = serde_json::from_reader(patterns_file).unwrap();
|
|
||||||
|
|
||||||
// Add entities to trie builder.
|
// Add entities to trie builder.
|
||||||
let mut code = String::new();
|
let mut code = String::new();
|
||||||
|
@ -155,10 +163,7 @@ fn generate_patterns() {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Write trie code to output Rust file.
|
// Write trie code to output Rust file.
|
||||||
let out_dir = env::var("OUT_DIR").unwrap();
|
write_rs("patterns", code);
|
||||||
let dest_path = Path::new(&out_dir).join("gen_patterns.rs");
|
|
||||||
let mut dest_file = File::create(&dest_path).unwrap();
|
|
||||||
dest_file.write_all(code.as_bytes()).unwrap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
use phf::phf_map;
|
|
||||||
use crate::pattern::TrieNode;
|
|
||||||
|
|
||||||
include!(concat!(env!("OUT_DIR"), "/gen_entities.rs"));
|
|
||||||
|
|
||||||
pub fn is_valid_entity_reference_name_char(c: u8) -> bool {
|
|
||||||
c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z'
|
|
||||||
}
|
|
|
@ -1,3 +1,2 @@
|
||||||
pub mod codepoint;
|
pub mod codepoint;
|
||||||
pub mod entity;
|
|
||||||
pub mod tag;
|
pub mod tag;
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
use crate::err::ProcessingResult;
|
||||||
|
use crate::proc::{Processor, ProcessorRange};
|
||||||
|
use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit};
|
||||||
|
use phf::phf_map;
|
||||||
|
use crate::pattern::TrieNode;
|
||||||
|
|
||||||
// The minimum length of any entity is 3, which is a character entity reference
|
// The minimum length of any entity is 3, which is a character entity reference
|
||||||
// with a single character name. The longest UTF-8 representation of a Unicode
|
// with a single character name. The longest UTF-8 representation of a Unicode
|
||||||
// code point is 4 bytes. Because there are no character entity references with
|
// code point is 4 bytes. Because there are no character entity references with
|
||||||
|
@ -17,10 +23,11 @@
|
||||||
// - Names must match case sensitively.
|
// - Names must match case sensitively.
|
||||||
// - Entities that don't have a semicolon do work e.g. `&2` => `&2`.
|
// - Entities that don't have a semicolon do work e.g. `&2` => `&2`.
|
||||||
|
|
||||||
use crate::err::ProcessingResult;
|
include!(concat!(env!("OUT_DIR"), "/gen_entities.rs"));
|
||||||
use crate::proc::{Processor, ProcessorRange};
|
|
||||||
use crate::spec::codepoint::{is_digit, is_hex_digit, is_lower_hex_digit, is_upper_hex_digit};
|
fn is_valid_entity_reference_name_char(c: u8) -> bool {
|
||||||
use crate::spec::entity::{ENTITY_REFERENCES, is_valid_entity_reference_name_char};
|
c >= b'0' && c <= b'9' || c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z'
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub enum EntityType {
|
pub enum EntityType {
|
||||||
|
|
Loading…
Reference in New Issue