Precompute generated files instead of building them every compile
This commit is contained in:
parent
2591f2879d
commit
99a13e3243
|
@ -1 +1,2 @@
|
||||||
/target
|
/target
|
||||||
|
/src/gen/
|
||||||
|
|
|
@ -23,11 +23,6 @@ name = "bitflags"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cascade"
|
|
||||||
version = "0.1.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "0.1.10"
|
version = "0.1.10"
|
||||||
|
@ -47,11 +42,6 @@ dependencies = [
|
||||||
"vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fastrie"
|
|
||||||
version = "0.0.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.1.14"
|
version = "0.1.14"
|
||||||
|
@ -82,19 +72,10 @@ dependencies = [
|
||||||
name = "hyperbuild"
|
name = "hyperbuild"
|
||||||
version = "0.0.45"
|
version = "0.0.45"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cascade 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"fastrie 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
"structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "itoa"
|
|
||||||
version = "0.4.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
@ -241,39 +222,6 @@ dependencies = [
|
||||||
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "ryu"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "serde"
|
|
||||||
version = "1.0.110"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
dependencies = [
|
|
||||||
"serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "serde_derive"
|
|
||||||
version = "1.0.110"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"quote 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"syn 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "serde_json"
|
|
||||||
version = "1.0.53"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
dependencies = [
|
|
||||||
"itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"ryu 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "siphasher"
|
name = "siphasher"
|
||||||
version = "0.3.3"
|
version = "0.3.3"
|
||||||
|
@ -387,14 +335,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||||
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||||
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||||
"checksum cascade 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "31c9ddf4a1a9dbf82e130117f81b0c292fb5416000cbaba11eb92a65face2613"
|
|
||||||
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||||
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
|
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
|
||||||
"checksum fastrie 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "16a8e873087682100de15eaccd3f4671c44fe589bd8989a854c061c961884d16"
|
|
||||||
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
|
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
|
||||||
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
||||||
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
|
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
|
||||||
"checksum itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
|
|
||||||
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
"checksum libc 0.2.69 (registry+https://github.com/rust-lang/crates.io-index)" = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005"
|
"checksum libc 0.2.69 (registry+https://github.com/rust-lang/crates.io-index)" = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005"
|
||||||
"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
||||||
|
@ -412,10 +357,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||||
"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
|
"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
|
||||||
"checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
|
"checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
|
||||||
"checksum ryu 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
|
|
||||||
"checksum serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
|
|
||||||
"checksum serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
|
|
||||||
"checksum serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)" = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2"
|
|
||||||
"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
||||||
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||||
"checksum structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
|
"checksum structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
|
||||||
|
|
|
@ -10,20 +10,14 @@ repository = "https://github.com/wilsonzlin/hyperbuild.git"
|
||||||
version = "0.0.45"
|
version = "0.0.45"
|
||||||
authors = ["Wilson Lin <code@wilsonl.in>"]
|
authors = ["Wilson Lin <code@wilsonl.in>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
include = ["/gen/*.json", "/src/**/*", "/build.rs", "/Cargo.toml", "/LICENSE", "/README.md"]
|
include = ["/src/**/*", "/Cargo.toml", "/LICENSE", "/README.md"]
|
||||||
|
|
||||||
[badges]
|
[badges]
|
||||||
maintenance = { status = "actively-developed" }
|
maintenance = { status = "actively-developed" }
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
cascade = "0.1.4"
|
|
||||||
fastrie = "0.0.6"
|
|
||||||
phf = { version = "0.8.0", features = ["macros"] }
|
phf = { version = "0.8.0", features = ["macros"] }
|
||||||
structopt = "0.3.5"
|
structopt = "0.3.5"
|
||||||
|
|
||||||
[build-dependencies]
|
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
|
||||||
serde_json = "1.0.44"
|
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
panic = 'abort'
|
panic = 'abort'
|
||||||
|
|
309
build.rs
309
build.rs
|
@ -1,309 +0,0 @@
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::env;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::Write;
|
|
||||||
use std::ops::{Index, IndexMut};
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
fn create_byte_string_literal(bytes: &[u8]) -> String {
|
|
||||||
format!("b\"{}\"", bytes
|
|
||||||
.iter()
|
|
||||||
.map(|&b| if b >= b' ' && b <= b'~' && b != b'\\' && b != b'"' {
|
|
||||||
(b as char).to_string()
|
|
||||||
} else {
|
|
||||||
format!("\\x{:02x}", b)
|
|
||||||
})
|
|
||||||
.collect::<String>())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_json<T>(name: &str) -> T where for<'de> T: Deserialize<'de> {
|
|
||||||
let patterns_path = Path::new("gen").join(format!("{}.json", name));
|
|
||||||
let patterns_file = File::open(patterns_path).unwrap();
|
|
||||||
serde_json::from_reader(patterns_file).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_rs(name: &str, code: String) -> () {
|
|
||||||
let out_dir = env::var("OUT_DIR").unwrap();
|
|
||||||
let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name));
|
|
||||||
let mut dest_file = File::create(&dest_path).unwrap();
|
|
||||||
dest_file.write_all(code.as_bytes()).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn name_words(n: &str) -> Vec<String> {
|
|
||||||
n.split(' ').map(|w| w.to_string()).collect::<Vec<String>>()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn snake_case(n: &Vec<String>) -> String {
|
|
||||||
n.iter().map(|w| w.to_uppercase()).collect::<Vec<String>>().join("_")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn camel_case(n: &Vec<String>) -> String {
|
|
||||||
n.iter().map(|w| format!(
|
|
||||||
"{}{}",
|
|
||||||
w.as_bytes()[0].to_ascii_uppercase() as char,
|
|
||||||
std::str::from_utf8(&w.as_bytes()[1..]).unwrap(),
|
|
||||||
)).collect::<Vec<String>>().join("")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct TwoDimensionalArray {
|
|
||||||
data: Vec<usize>,
|
|
||||||
cols: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TwoDimensionalArray {
|
|
||||||
pub fn new(rows: usize, cols: usize) -> TwoDimensionalArray {
|
|
||||||
TwoDimensionalArray {
|
|
||||||
data: vec![0usize; rows * cols],
|
|
||||||
cols,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn prebuilt(data: Vec<usize>, cols: usize) -> TwoDimensionalArray {
|
|
||||||
TwoDimensionalArray { data, cols }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type TwoDimensionalArrayIndex = (usize, usize);
|
|
||||||
|
|
||||||
impl Index<TwoDimensionalArrayIndex> for TwoDimensionalArray {
|
|
||||||
type Output = usize;
|
|
||||||
|
|
||||||
fn index(&self, (row, col): TwoDimensionalArrayIndex) -> &Self::Output {
|
|
||||||
&self.data[row * self.cols + col]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IndexMut<TwoDimensionalArrayIndex> for TwoDimensionalArray {
|
|
||||||
fn index_mut(&mut self, (row, col): TwoDimensionalArrayIndex) -> &mut Self::Output {
|
|
||||||
&mut self.data[row * self.cols + col]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_pattern(pattern: String) -> String {
|
|
||||||
assert!(pattern.is_ascii());
|
|
||||||
let seq = pattern.as_bytes();
|
|
||||||
let dfa = &mut TwoDimensionalArray::new(256, seq.len());
|
|
||||||
|
|
||||||
dfa[(seq[0] as usize, 0)] = 1;
|
|
||||||
let mut x = 0;
|
|
||||||
let mut j = 1;
|
|
||||||
while j < seq.len() {
|
|
||||||
for c in 0..256 {
|
|
||||||
dfa[(c, j)] = dfa[(c, x)];
|
|
||||||
};
|
|
||||||
dfa[(seq[j] as usize, j)] = j + 1;
|
|
||||||
x = dfa[(seq[j] as usize, x)];
|
|
||||||
j += 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
format!(
|
|
||||||
"crate::pattern::SinglePattern::prebuilt(&[{}], {})",
|
|
||||||
dfa.data.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "),
|
|
||||||
seq.len(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
struct TagAttr {
|
|
||||||
boolean: bool,
|
|
||||||
redundant_if_empty: bool,
|
|
||||||
collapse_and_trim: bool,
|
|
||||||
default_value: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TagAttr {
|
|
||||||
fn code(&self) -> String {
|
|
||||||
format!(r"
|
|
||||||
AttributeMinification {{
|
|
||||||
boolean: {boolean},
|
|
||||||
redundant_if_empty: {redundant_if_empty},
|
|
||||||
collapse_and_trim: {collapse_and_trim},
|
|
||||||
default_value: {default_value},
|
|
||||||
}}
|
|
||||||
",
|
|
||||||
boolean = self.boolean,
|
|
||||||
redundant_if_empty = self.redundant_if_empty,
|
|
||||||
collapse_and_trim = self.collapse_and_trim,
|
|
||||||
default_value = match &self.default_value {
|
|
||||||
Some(val) => format!("Some({})", create_byte_string_literal(val.as_bytes())),
|
|
||||||
None => "None".to_string(),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_attr_map() {
|
|
||||||
let attrs: HashMap<String, HashMap<String, HashMap<String, TagAttr>>> = read_json("attrs");
|
|
||||||
let mut code = String::new();
|
|
||||||
for (attr_name, namespaces) in attrs.iter() {
|
|
||||||
let mut by_namespace_code = String::new();
|
|
||||||
by_namespace_code.push_str(format!("static {}_ATTR: ByNamespace = ByNamespace {{\n", attr_name.to_uppercase()).as_str());
|
|
||||||
for namespace in ["html".to_string(), "svg".to_string()].iter() {
|
|
||||||
by_namespace_code.push_str(format!("\t{}: ", namespace).as_str());
|
|
||||||
match namespaces.get(namespace) {
|
|
||||||
None => by_namespace_code.push_str("None"),
|
|
||||||
Some(tags_map) => {
|
|
||||||
if let Some(global_attr) = tags_map.get("*") {
|
|
||||||
code.push_str(format!(
|
|
||||||
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements({});\n\n",
|
|
||||||
namespace.to_uppercase(),
|
|
||||||
attr_name.to_uppercase(),
|
|
||||||
global_attr.code(),
|
|
||||||
).as_str());
|
|
||||||
} else {
|
|
||||||
code.push_str(format!(
|
|
||||||
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {{\n{}\n}});\n\n",
|
|
||||||
namespace.to_uppercase(),
|
|
||||||
attr_name.to_uppercase(),
|
|
||||||
tags_map
|
|
||||||
.iter()
|
|
||||||
.map(|(tag_name, tag_attr)| format!(
|
|
||||||
"b\"{}\" => {}",
|
|
||||||
tag_name,
|
|
||||||
tag_attr.code(),
|
|
||||||
))
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join(",\n"),
|
|
||||||
).as_str());
|
|
||||||
};
|
|
||||||
by_namespace_code.push_str(format!("Some({}_{}_ATTR)", namespace.to_uppercase(), attr_name.to_uppercase()).as_str());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
by_namespace_code.push_str(",\n");
|
|
||||||
};
|
|
||||||
by_namespace_code.push_str("};\n\n");
|
|
||||||
code.push_str(&by_namespace_code);
|
|
||||||
};
|
|
||||||
code.push_str("pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n");
|
|
||||||
for attr_name in attrs.keys() {
|
|
||||||
code.push_str(format!("\tb\"{}\" => {}_ATTR,\n", attr_name, attr_name.to_uppercase()).as_str());
|
|
||||||
};
|
|
||||||
code.push_str("});\n\n");
|
|
||||||
write_rs("attrs", code);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
struct Entity {
|
|
||||||
codepoints: Vec<u32>,
|
|
||||||
characters: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct TrieBuilderNode {
|
|
||||||
value: Option<String>,
|
|
||||||
children: Vec<Option<TrieBuilderNode>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct TrieBuilderGenerationContext<'t, 'v, 'o> {
|
|
||||||
trie_name: &'t str,
|
|
||||||
value_type: &'v str,
|
|
||||||
next_id: usize,
|
|
||||||
out: &'o mut String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t, 'v, 'o> TrieBuilderGenerationContext<'t, 'v, 'o> {
|
|
||||||
pub fn id(&mut self) -> usize {
|
|
||||||
let next = self.next_id;
|
|
||||||
self.next_id += 1;
|
|
||||||
next
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TrieBuilderNode {
|
|
||||||
pub fn new() -> TrieBuilderNode {
|
|
||||||
let mut children = Vec::new();
|
|
||||||
for _ in 0..256 {
|
|
||||||
children.push(None);
|
|
||||||
};
|
|
||||||
TrieBuilderNode { value: None, children }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add(&mut self, seq: &[u8], value: String) -> () {
|
|
||||||
let mut current = self;
|
|
||||||
for c in seq.iter() {
|
|
||||||
current = current.children[*c as usize].get_or_insert_with(|| TrieBuilderNode::new());
|
|
||||||
};
|
|
||||||
current.value.replace(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _generated_node_var_name(&self, trie_name: &str, node_id: usize) -> String {
|
|
||||||
format!("{trie_name}_NODE_{node_id}", trie_name = trie_name, node_id = node_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _generate(&self, ctx: &mut TrieBuilderGenerationContext) -> usize {
|
|
||||||
let children = self.children.iter().map(|c| match c {
|
|
||||||
None => "None".to_string(),
|
|
||||||
Some(c) => {
|
|
||||||
let child_id = c._generate(ctx);
|
|
||||||
format!("Some({})", self._generated_node_var_name(ctx.trie_name, child_id))
|
|
||||||
}
|
|
||||||
}).collect::<Vec<String>>().join(", ");
|
|
||||||
let id = ctx.id();
|
|
||||||
let code = format!(
|
|
||||||
"static {var_name}: &'static crate::pattern::TrieNode<{value_type}> = &crate::pattern::TrieNode {{\n\tvalue: {value},\n\tchildren: [{children}],\n}};\n\n",
|
|
||||||
var_name = self._generated_node_var_name(ctx.trie_name, id),
|
|
||||||
value_type = ctx.value_type,
|
|
||||||
value = self.value.as_ref().map_or("None".to_string(), |v| format!("Some({})", v)),
|
|
||||||
children = children,
|
|
||||||
);
|
|
||||||
ctx.out.push_str(code.as_str());
|
|
||||||
id
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate(&self, trie_name: &str, value_type: &str) -> String {
|
|
||||||
let mut out = String::new();
|
|
||||||
let mut ctx = TrieBuilderGenerationContext {
|
|
||||||
trie_name,
|
|
||||||
value_type,
|
|
||||||
next_id: 0,
|
|
||||||
out: &mut out,
|
|
||||||
};
|
|
||||||
let root_id = self._generate(&mut ctx);
|
|
||||||
// Make root node public and use proper name.
|
|
||||||
ctx.out.replace(
|
|
||||||
format!("static {}", self._generated_node_var_name(trie_name, root_id)).as_str(),
|
|
||||||
format!("pub static {}", trie_name).as_str()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_entities() {
|
|
||||||
// Read named entities map from JSON file.
|
|
||||||
let entities: HashMap<String, Entity> = read_json("entities");
|
|
||||||
|
|
||||||
// Add entities to trie builder.
|
|
||||||
let mut trie_builder: TrieBuilderNode = TrieBuilderNode::new();
|
|
||||||
for (rep, entity) in entities {
|
|
||||||
let val = if rep.as_bytes().len() < entity.characters.as_bytes().len() {
|
|
||||||
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
|
|
||||||
println!("Entity {} is shorter than decoded UTF-8 bytes...", rep);
|
|
||||||
// Include '&' in value.
|
|
||||||
create_byte_string_literal(rep.as_bytes())
|
|
||||||
} else {
|
|
||||||
create_byte_string_literal(entity.characters.as_bytes())
|
|
||||||
};
|
|
||||||
trie_builder.add(&(rep.as_bytes())[1..], val);
|
|
||||||
};
|
|
||||||
// Write trie code to output Rust file.
|
|
||||||
write_rs("entities", trie_builder.generate(
|
|
||||||
"ENTITY_REFERENCES",
|
|
||||||
"&'static [u8]",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_patterns() {
|
|
||||||
let patterns: HashMap<String, String> = read_json("patterns");
|
|
||||||
|
|
||||||
for (name, pattern) in patterns {
|
|
||||||
let mut code = String::new();
|
|
||||||
code.push_str(format!("static {}: &crate::pattern::SinglePattern = &{};", name, build_pattern(pattern)).as_str());
|
|
||||||
write_rs(format!("pattern_{}", name).as_str(), code);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
generate_attr_map();
|
|
||||||
generate_entities();
|
|
||||||
generate_patterns();
|
|
||||||
}
|
|
|
@ -1,2 +1,2 @@
|
||||||
node_modules/
|
node_modules/
|
||||||
build/*.d.ts
|
/package-lock.json
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
import { join } from "path";
|
||||||
|
import {mkdirSync, writeFileSync} from 'fs';
|
||||||
|
|
||||||
|
export const RUST_OUT_DIR = join(__dirname, '..', 'src', 'gen');
|
||||||
|
|
||||||
|
try {
|
||||||
|
mkdirSync(RUST_OUT_DIR);
|
||||||
|
} catch (err) {
|
||||||
|
if (err.code !== 'EEXIST') {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeFileSync(join(RUST_OUT_DIR, 'mod.rs'), `
|
||||||
|
pub mod attrs;
|
||||||
|
pub mod dfa;
|
||||||
|
pub mod entities;
|
||||||
|
pub mod patterns;
|
||||||
|
`);
|
||||||
|
|
||||||
|
export const DATA_DIR = join(__dirname, 'data');
|
||||||
|
|
||||||
|
export const leftPad = (str: string, n: number) => '0'.repeat(n - str.length) + str;
|
||||||
|
|
||||||
|
export const prettyJson = (v: any) => JSON.stringify(v, null, 2);
|
||||||
|
|
||||||
|
export const byteStringLiteral = (bytes: number[]): string => 'b"' + bytes.map(c => {
|
||||||
|
if (c > 255) throw new Error('Not a byte');
|
||||||
|
// 0x20 == ' '.
|
||||||
|
// 0x7E == '~'.
|
||||||
|
// 0x5C == '\\'.
|
||||||
|
// 0x22 == '"'.
|
||||||
|
if (c >= 0x20 && c <= 0x7E && c != 0x5C && c != 0x22) {
|
||||||
|
return String.fromCharCode(c);
|
||||||
|
} else {
|
||||||
|
return `\\x${leftPad(c.toString(16), 2)}`;
|
||||||
|
}
|
||||||
|
}).join('') + '"';
|
4431
gen/attrs.json
4431
gen/attrs.json
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,210 @@
|
||||||
|
import {readFileSync, writeFileSync} from 'fs';
|
||||||
|
import ts, {Node, SourceFile, SyntaxKind, Type} from 'typescript';
|
||||||
|
import {join} from 'path';
|
||||||
|
import {DATA_DIR, prettyJson, RUST_OUT_DIR} from './_common';
|
||||||
|
|
||||||
|
const reactDeclarations = readFileSync(join(__dirname, 'data', 'react.d.ts'), 'utf8');
|
||||||
|
|
||||||
|
// TODO Consider and check behaviour when value matches case insensitively, after trimming whitespace, numerically (for number values), etc.
|
||||||
|
// TODO This file is currently manually sourced and written. Try to get machine-readable spec and automate.
|
||||||
|
const defaultAttributeValues: {
|
||||||
|
[attr: string]: {
|
||||||
|
tags: string[];
|
||||||
|
defaultValue: string;
|
||||||
|
isPositiveInteger?: boolean;
|
||||||
|
}[];
|
||||||
|
} = JSON.parse(readFileSync(join(DATA_DIR, 'attrs.json'), 'utf8'));
|
||||||
|
|
||||||
|
const tagNameNormalised = {
|
||||||
|
'anchor': 'a',
|
||||||
|
};
|
||||||
|
|
||||||
|
const attrNameNormalised = {
|
||||||
|
'classname': 'class',
|
||||||
|
};
|
||||||
|
|
||||||
|
const reactSpecificAttributes = [
|
||||||
|
'defaultChecked', 'defaultValue', 'suppressContentEditableWarning', 'suppressHydrationWarning',
|
||||||
|
];
|
||||||
|
|
||||||
|
const collapsibleAndTrimmable = {
|
||||||
|
'class': ['html:*'],
|
||||||
|
'd': ['svg:*'],
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO Is escapedText the API for getting name?
|
||||||
|
const getNameOfNode = (n: any) => n.name.escapedText;
|
||||||
|
const normaliseName = (name: string, norms: { [name: string]: string }) => [name.toLowerCase()].map(n => norms[n] || n)[0];
|
||||||
|
|
||||||
|
type AttrConfig = {
|
||||||
|
boolean: boolean;
|
||||||
|
redundantIfEmpty: boolean;
|
||||||
|
collapseAndTrim: boolean;
|
||||||
|
defaultValue?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const rsTagAttr = ({
|
||||||
|
redundantIfEmpty,
|
||||||
|
defaultValue,
|
||||||
|
collapseAndTrim,
|
||||||
|
boolean,
|
||||||
|
}: AttrConfig) => `AttributeMinification {
|
||||||
|
boolean: ${boolean},
|
||||||
|
redundant_if_empty: ${redundantIfEmpty},
|
||||||
|
collapse_and_trim: ${collapseAndTrim},
|
||||||
|
default_value: ${defaultValue == undefined ? 'None' : `Some(b"${defaultValue}")`},
|
||||||
|
}`;
|
||||||
|
|
||||||
|
const processReactTypeDeclarations = (source: SourceFile) => {
|
||||||
|
const nodes: Node[] = [source];
|
||||||
|
// Use index-based loop to keep iterating as nodes array grows.
|
||||||
|
for (let i = 0; i < nodes.length; i++) {
|
||||||
|
// forEachChild doesn't work if return value is number (e.g. return value of Array.prototype.push).
|
||||||
|
nodes[i].forEachChild(c => void nodes.push(c));
|
||||||
|
}
|
||||||
|
const attributeNodes = nodes
|
||||||
|
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
|
||||||
|
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
|
||||||
|
.filter(([matches]) => !!matches)
|
||||||
|
.map(([matches, node]) => [matches![2].toLowerCase(), normaliseName(matches![1], tagNameNormalised), node])
|
||||||
|
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
|
||||||
|
.map(([namespace, tag, node]) => ({namespace, tag, node}))
|
||||||
|
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
|
||||||
|
|
||||||
|
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
|
||||||
|
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
|
||||||
|
throw new Error(`Global HTML attributes is not first to be processed`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map structure: attr => namespace => tag => config.
|
||||||
|
const attributes = new Map<string, Map<'html' | 'svg', Map<string, AttrConfig>>>();
|
||||||
|
|
||||||
|
for (const {namespace, tag, node} of attributeNodes) {
|
||||||
|
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
|
||||||
|
for (const n of node.members.filter((n: Node) => n.kind === ts.SyntaxKind.PropertySignature)) {
|
||||||
|
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
|
||||||
|
if (reactSpecificAttributes.includes(attrName)) continue;
|
||||||
|
|
||||||
|
const types: SyntaxKind[] = n.type.kind === ts.SyntaxKind.UnionType
|
||||||
|
? n.type.types.map((t: Node) => t.kind)
|
||||||
|
: [n.type.kind];
|
||||||
|
|
||||||
|
const boolean = types.includes(ts.SyntaxKind.BooleanKeyword);
|
||||||
|
// If types includes boolean and string, make it a boolean attr to prevent it from being removed if empty value.
|
||||||
|
const redundantIfEmpty = !boolean && types.some(t => t === ts.SyntaxKind.StringKeyword || t === ts.SyntaxKind.NumberKeyword);
|
||||||
|
const defaultValues = (defaultAttributeValues[attrName] || [])
|
||||||
|
.filter(a => a.tags.includes(fullyQualifiedTagName))
|
||||||
|
.map(a => a.defaultValue);
|
||||||
|
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
|
||||||
|
if (defaultValues.length > 1) {
|
||||||
|
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValues}`);
|
||||||
|
}
|
||||||
|
const attr: AttrConfig = {
|
||||||
|
boolean,
|
||||||
|
redundantIfEmpty,
|
||||||
|
collapseAndTrim,
|
||||||
|
defaultValue: defaultValues[0],
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
|
||||||
|
const namespacesForAttribute = attributes.get(attrName)!;
|
||||||
|
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
|
||||||
|
const tagsForNsAttribute = namespacesForAttribute.get(namespace)!;
|
||||||
|
if (tagsForNsAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
|
||||||
|
|
||||||
|
const globalAttr = tagsForNsAttribute.get('*');
|
||||||
|
if (globalAttr) {
|
||||||
|
if (globalAttr.boolean !== attr.boolean
|
||||||
|
|| globalAttr.redundantIfEmpty !== attr.redundantIfEmpty
|
||||||
|
|| globalAttr.collapseAndTrim !== attr.collapseAndTrim
|
||||||
|
|| globalAttr.defaultValue !== attr.defaultValue) {
|
||||||
|
throw new Error(`Global and tag-specific attributes conflict: ${prettyJson(globalAttr)} ${prettyJson(attr)}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tagsForNsAttribute.set(tag || '*', attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let code = `
|
||||||
|
use crate::spec::tag::ns::Namespace;
|
||||||
|
|
||||||
|
pub struct AttributeMinification {
|
||||||
|
pub boolean: bool,
|
||||||
|
pub redundant_if_empty: bool,
|
||||||
|
pub collapse_and_trim: bool,
|
||||||
|
pub default_value: Option<&'static [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum AttrMapEntry {
|
||||||
|
AllNamespaceElements(AttributeMinification),
|
||||||
|
SpecificNamespaceElements(phf::Map<&'static [u8], AttributeMinification>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct ByNamespace {
|
||||||
|
// Make pub so this struct can be statically created in gen/attrs.rs.
|
||||||
|
pub html: Option<&'static AttrMapEntry>,
|
||||||
|
pub svg: Option<&'static AttrMapEntry>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ByNamespace {
|
||||||
|
fn get(&self, ns: Namespace) -> Option<&'static AttrMapEntry> {
|
||||||
|
match ns {
|
||||||
|
Namespace::Html => self.html,
|
||||||
|
Namespace::Svg => self.svg,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AttrMap(phf::Map<&'static [u8], ByNamespace>);
|
||||||
|
|
||||||
|
impl AttrMap {
|
||||||
|
pub const fn new(map: phf::Map<&'static [u8], ByNamespace>) -> AttrMap {
|
||||||
|
AttrMap(map)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, ns: Namespace, tag: &[u8], attr: &[u8]) -> Option<&AttributeMinification> {
|
||||||
|
self.0.get(attr).and_then(|namespaces| namespaces.get(ns)).and_then(|entry| match entry {
|
||||||
|
AttrMapEntry::AllNamespaceElements(min) => Some(min),
|
||||||
|
AttrMapEntry::SpecificNamespaceElements(map) => map.get(tag),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
`;
|
||||||
|
|
||||||
|
for (const [attrName, namespaces] of attributes) {
|
||||||
|
let byNsCode = '';
|
||||||
|
byNsCode += `static ${attrName.toUpperCase()}_ATTR: ByNamespace = ByNamespace {\n`;
|
||||||
|
for (const ns of ['html', 'svg'] as const) {
|
||||||
|
byNsCode += `\t${ns}: `;
|
||||||
|
const tagsMap = namespaces.get(ns);
|
||||||
|
if (!tagsMap) {
|
||||||
|
byNsCode += 'None';
|
||||||
|
} else {
|
||||||
|
const globalAttr = tagsMap.get('*');
|
||||||
|
if (globalAttr) {
|
||||||
|
code += `static ${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements(${rsTagAttr(globalAttr)});\n\n`;
|
||||||
|
} else {
|
||||||
|
code += `static ${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {\n${
|
||||||
|
[...tagsMap].map(([tagName, tagAttr]) => `b\"${tagName}\" => ${rsTagAttr(tagAttr)}`).join(',\n')
|
||||||
|
}\n});\n\n`;
|
||||||
|
}
|
||||||
|
byNsCode += `Some(${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR)`;
|
||||||
|
}
|
||||||
|
byNsCode += ',\n';
|
||||||
|
}
|
||||||
|
byNsCode += '};\n\n';
|
||||||
|
code += byNsCode;
|
||||||
|
}
|
||||||
|
code += 'pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n';
|
||||||
|
for (const attr_name of attributes.keys()) {
|
||||||
|
code += `\tb\"${attr_name}\" => ${attr_name.toUpperCase()}_ATTR,\n`;
|
||||||
|
}
|
||||||
|
code += '});\n\n';
|
||||||
|
return code;
|
||||||
|
};
|
||||||
|
|
||||||
|
const source = ts.createSourceFile(`react.d.ts`, reactDeclarations, ts.ScriptTarget.ES2020);
|
||||||
|
writeFileSync(join(RUST_OUT_DIR, 'attrs.rs'), processReactTypeDeclarations(source));
|
|
@ -1,215 +0,0 @@
|
||||||
const request = require('request-promise-native');
|
|
||||||
const {promises: fs} = require('fs');
|
|
||||||
const ts = require('typescript');
|
|
||||||
const path = require('path');
|
|
||||||
|
|
||||||
const compareEntryNames = (a, b) => a[0].localeCompare(b[0]);
|
|
||||||
const deepObjectifyMap = map => Object.fromEntries(
|
|
||||||
[...map.entries()]
|
|
||||||
.map(([key, value]) => [key, value instanceof Map ? deepObjectifyMap(value) : value])
|
|
||||||
.sort(compareEntryNames)
|
|
||||||
);
|
|
||||||
const fromCamelCase = camelCase => camelCase.split(/(?=^|[A-Z])/).map(w => w.toLowerCase());
|
|
||||||
const prettyjson = v => JSON.stringify(v, null, 2);
|
|
||||||
|
|
||||||
const ATTRS_PATH = path.join(__dirname, '..', 'attrs.json');
|
|
||||||
|
|
||||||
const REACT_TYPINGS_URL = 'https://raw.githubusercontent.com/DefinitelyTyped/DefinitelyTyped/master/types/react/index.d.ts';
|
|
||||||
const REACT_TYPINGS_FILE = path.join(__dirname, 'react.d.ts');
|
|
||||||
const fetchReactTypingsSource = async () => {
|
|
||||||
try {
|
|
||||||
return await fs.readFile(REACT_TYPINGS_FILE, 'utf8');
|
|
||||||
} catch (err) {
|
|
||||||
if (err.code !== 'ENOENT') {
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
const source = await request(REACT_TYPINGS_URL);
|
|
||||||
await fs.writeFile(REACT_TYPINGS_FILE, source);
|
|
||||||
return source;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const tagNameNormalised = {
|
|
||||||
'anchor': 'a',
|
|
||||||
};
|
|
||||||
|
|
||||||
const attrNameNormalised = {
|
|
||||||
'classname': 'class',
|
|
||||||
};
|
|
||||||
|
|
||||||
const reactSpecificAttributes = [
|
|
||||||
'defaultChecked', 'defaultValue', 'suppressContentEditableWarning', 'suppressHydrationWarning',
|
|
||||||
];
|
|
||||||
|
|
||||||
// TODO Consider and check behaviour when value matches case insensitively, after trimming whitespace, numerically (for number values), etc.
|
|
||||||
// TODO This is currently manually sourced and written. Try to get machine-readable spec and automate.
|
|
||||||
const defaultAttributeValues = {
|
|
||||||
'align': [{
|
|
||||||
tags: ['html:img'],
|
|
||||||
defaultValue: 'bottom',
|
|
||||||
}],
|
|
||||||
'decoding': [{
|
|
||||||
tags: ['html:img'],
|
|
||||||
defaultValue: 'auto',
|
|
||||||
}],
|
|
||||||
'enctype': [{
|
|
||||||
tags: ['html:form'],
|
|
||||||
defaultValue: 'application/x-www-form-urlencoded',
|
|
||||||
}],
|
|
||||||
'frameborder': [{
|
|
||||||
tags: ['html:iframe'],
|
|
||||||
defaultValue: '1',
|
|
||||||
isPositiveInteger: true,
|
|
||||||
}],
|
|
||||||
'formenctype': [{
|
|
||||||
tags: ['html:button', 'html:input'],
|
|
||||||
defaultValue: 'application/x-www-form-urlencoded',
|
|
||||||
}],
|
|
||||||
'height': [{
|
|
||||||
tags: ['html:iframe'],
|
|
||||||
defaultValue: '150',
|
|
||||||
isPositiveInteger: true,
|
|
||||||
}],
|
|
||||||
'importance': [{
|
|
||||||
tags: ['html:iframe'],
|
|
||||||
defaultValue: 'auto',
|
|
||||||
}],
|
|
||||||
'loading': [{
|
|
||||||
tags: ['html:iframe', 'html:img'],
|
|
||||||
defaultValue: 'eager',
|
|
||||||
}],
|
|
||||||
'media': [{
|
|
||||||
tags: ['html:style'],
|
|
||||||
defaultValue: 'all',
|
|
||||||
}],
|
|
||||||
'method': [{
|
|
||||||
tags: ['html:form'],
|
|
||||||
defaultValue: 'get',
|
|
||||||
}],
|
|
||||||
'referrerpolicy': [{
|
|
||||||
tags: ['html:iframe', 'html:img'],
|
|
||||||
defaultValue: 'no-referrer-when-downgrade',
|
|
||||||
}],
|
|
||||||
'rules': [{
|
|
||||||
tags: ['html:table'],
|
|
||||||
defaultValue: 'none',
|
|
||||||
}],
|
|
||||||
'shape': [{
|
|
||||||
tags: ['html:area'],
|
|
||||||
defaultValue: 'rect',
|
|
||||||
}],
|
|
||||||
'span': [{
|
|
||||||
tags: ['html:col', 'html:colgroup'],
|
|
||||||
defaultValue: '1',
|
|
||||||
isPositiveInteger: true,
|
|
||||||
}],
|
|
||||||
'target': [{
|
|
||||||
tags: ['html:a', 'html:form'],
|
|
||||||
defaultValue: '_self',
|
|
||||||
}],
|
|
||||||
'type': [{
|
|
||||||
tags: ['html:button'],
|
|
||||||
defaultValue: 'submit',
|
|
||||||
}, {
|
|
||||||
tags: ['html:input'],
|
|
||||||
defaultValue: 'text',
|
|
||||||
}, {
|
|
||||||
tags: ['html:link', 'html:style'],
|
|
||||||
defaultValue: 'text/css',
|
|
||||||
}],
|
|
||||||
'width': [{
|
|
||||||
tags: ['html:iframe'],
|
|
||||||
defaultValue: '300',
|
|
||||||
isPositiveInteger: true,
|
|
||||||
}]
|
|
||||||
};
|
|
||||||
|
|
||||||
const collapsibleAndTrimmable = {
|
|
||||||
'class': ['html:*'],
|
|
||||||
'd': ['svg:*'],
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO Is escapedText the API for getting name?
|
|
||||||
const getNameOfNode = n => n.name.escapedText;
|
|
||||||
const normaliseName = (name, norms) => [name.toLowerCase()].map(n => norms[n] || n)[0];
|
|
||||||
|
|
||||||
const processReactTypeDeclarations = async (source) => {
|
|
||||||
const nodes = [source];
|
|
||||||
// Use index-based loop to keep iterating as nodes array grows.
|
|
||||||
for (let i = 0; i < nodes.length; i++) {
|
|
||||||
// forEachChild doesn't work if return value is number (e.g. return value of Array.prototype.push).
|
|
||||||
nodes[i].forEachChild(c => void nodes.push(c));
|
|
||||||
}
|
|
||||||
const attributeNodes = nodes
|
|
||||||
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
|
|
||||||
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
|
|
||||||
.filter(([matches]) => matches)
|
|
||||||
.map(([matches, node]) => [matches[2].toLowerCase(), normaliseName(matches[1], tagNameNormalised), node])
|
|
||||||
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
|
|
||||||
.map(([namespace, tag, node]) => ({namespace, tag, node}))
|
|
||||||
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
|
|
||||||
|
|
||||||
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
|
|
||||||
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
|
|
||||||
throw new Error(`Global HTML attributes is not first to be processed`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map structure: attr => namespace => tag => config.
|
|
||||||
const attributes = new Map();
|
|
||||||
|
|
||||||
for (const {namespace, tag, node} of attributeNodes) {
|
|
||||||
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
|
|
||||||
for (const n of node.members.filter(n => n.kind === ts.SyntaxKind.PropertySignature)) {
|
|
||||||
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
|
|
||||||
if (reactSpecificAttributes.includes(attrName)) continue;
|
|
||||||
|
|
||||||
const types = n.type.kind === ts.SyntaxKind.UnionType
|
|
||||||
? n.type.types.map(t => t.kind)
|
|
||||||
: [n.type.kind];
|
|
||||||
|
|
||||||
const boolean = types.includes(ts.SyntaxKind.BooleanKeyword);
|
|
||||||
// If types includes boolean and string, make it a boolean attr to prevent it from being removed if empty value.
|
|
||||||
const redundantIfEmpty = !boolean &&
|
|
||||||
(types.includes(ts.SyntaxKind.StringKeyword) || types.includes(ts.SyntaxKind.NumberKeyword));
|
|
||||||
const defaultValue = (defaultAttributeValues[attrName] || [])
|
|
||||||
.filter(a => a.tags.includes(fullyQualifiedTagName))
|
|
||||||
.map(a => a.defaultValue);
|
|
||||||
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
|
|
||||||
if (defaultValue.length > 1) {
|
|
||||||
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValue}`);
|
|
||||||
}
|
|
||||||
const attr = {
|
|
||||||
boolean,
|
|
||||||
redundant_if_empty: redundantIfEmpty,
|
|
||||||
collapse_and_trim: collapseAndTrim,
|
|
||||||
default_value: defaultValue[0],
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
|
|
||||||
const namespacesForAttribute = attributes.get(attrName);
|
|
||||||
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
|
|
||||||
const tagsForNSAttribute = namespacesForAttribute.get(namespace);
|
|
||||||
if (tagsForNSAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
|
|
||||||
|
|
||||||
const globalAttr = tagsForNSAttribute.get('*');
|
|
||||||
if (globalAttr) {
|
|
||||||
if (globalAttr.boolean !== attr.boolean
|
|
||||||
|| globalAttr.redundant_if_empty !== attr.redundant_if_empty
|
|
||||||
|| globalAttr.collapse_and_trim !== attr.collapse_and_trim
|
|
||||||
|| globalAttr.default_value !== attr.default_value) {
|
|
||||||
throw new Error(`Global and tag-specific attributes conflict: ${prettyjson(globalAttr)} ${prettyjson(attr)}`);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tagsForNSAttribute.set(tag || '*', attr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort output JSON object by property so diffs are clearer.
|
|
||||||
await fs.writeFile(ATTRS_PATH, prettyjson(deepObjectifyMap(attributes)));
|
|
||||||
};
|
|
||||||
|
|
||||||
(async () => {
|
|
||||||
const source = ts.createSourceFile(`react.d.ts`, await fetchReactTypingsSource(), ts.ScriptTarget.ES2019);
|
|
||||||
await processReactTypeDeclarations(source);
|
|
||||||
})();
|
|
|
@ -1,8 +0,0 @@
|
||||||
{
|
|
||||||
"private": true,
|
|
||||||
"dependencies": {
|
|
||||||
"request": "^2.88.0",
|
|
||||||
"request-promise-native": "^1.0.8",
|
|
||||||
"typescript": "^3.7.4"
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,160 @@
|
||||||
|
{
|
||||||
|
"align": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:img"
|
||||||
|
],
|
||||||
|
"defaultValue": "bottom"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"decoding": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:img"
|
||||||
|
],
|
||||||
|
"defaultValue": "auto"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"enctype": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:form"
|
||||||
|
],
|
||||||
|
"defaultValue": "application/x-www-form-urlencoded"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"frameborder": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:iframe"
|
||||||
|
],
|
||||||
|
"defaultValue": "1",
|
||||||
|
"isPositiveInteger": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"formenctype": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:button",
|
||||||
|
"html:input"
|
||||||
|
],
|
||||||
|
"defaultValue": "application/x-www-form-urlencoded"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"height": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:iframe"
|
||||||
|
],
|
||||||
|
"defaultValue": "150",
|
||||||
|
"isPositiveInteger": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"importance": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:iframe"
|
||||||
|
],
|
||||||
|
"defaultValue": "auto"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"loading": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:iframe",
|
||||||
|
"html:img"
|
||||||
|
],
|
||||||
|
"defaultValue": "eager"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"media": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:style"
|
||||||
|
],
|
||||||
|
"defaultValue": "all"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"method": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:form"
|
||||||
|
],
|
||||||
|
"defaultValue": "get"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"referrerpolicy": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:iframe",
|
||||||
|
"html:img"
|
||||||
|
],
|
||||||
|
"defaultValue": "no-referrer-when-downgrade"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rules": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:table"
|
||||||
|
],
|
||||||
|
"defaultValue": "none"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"shape": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:area"
|
||||||
|
],
|
||||||
|
"defaultValue": "rect"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:col",
|
||||||
|
"html:colgroup"
|
||||||
|
],
|
||||||
|
"defaultValue": "1",
|
||||||
|
"isPositiveInteger": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"target": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:a",
|
||||||
|
"html:form"
|
||||||
|
],
|
||||||
|
"defaultValue": "_self"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"type": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:button"
|
||||||
|
],
|
||||||
|
"defaultValue": "submit"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:input"
|
||||||
|
],
|
||||||
|
"defaultValue": "text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:link",
|
||||||
|
"html:style"
|
||||||
|
],
|
||||||
|
"defaultValue": "text/css"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"width": [
|
||||||
|
{
|
||||||
|
"tags": [
|
||||||
|
"html:iframe"
|
||||||
|
],
|
||||||
|
"defaultValue": "300",
|
||||||
|
"isPositiveInteger": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
# Prefixes:
|
||||||
|
# `_` means to lowercase accumulate.
|
||||||
|
# `<` means to accumulate transition pattern as part of current state.
|
||||||
|
# `+` means to accumulate transition pattern as part of next state.
|
||||||
|
# `?` means to look ahead but don't accumulate transition pattern and allow next state to reconsume.
|
||||||
|
|
||||||
|
Text:
|
||||||
|
'\w': ?TextWhitespace
|
||||||
|
'\<': +OpeningTagStart
|
||||||
|
'\</': +ClosingTag
|
||||||
|
'\<!--': +Comment
|
||||||
|
'&': ?TextEntity
|
||||||
|
'': Text
|
||||||
|
|
||||||
|
TextWhitespace:
|
||||||
|
'\w': TextWhitespace
|
||||||
|
'&': ?TextEntity
|
||||||
|
'': ?Text
|
||||||
|
|
||||||
|
Comment:
|
||||||
|
'-->': <Text
|
||||||
|
'': Comment
|
||||||
|
|
||||||
|
ClosingTag:
|
||||||
|
'<tagName>': _ClosingTag
|
||||||
|
'>': <Text
|
||||||
|
|
||||||
|
OpeningTagStart:
|
||||||
|
'\w': ?OpeningTagWhitespace
|
||||||
|
'<tagName>': _OpeningTagStart
|
||||||
|
|
||||||
|
OpeningTagWhitespace:
|
||||||
|
'\w': OpeningTagWhitespace
|
||||||
|
'<attrName>': ?AttrName
|
||||||
|
'>': <Text
|
||||||
|
|
||||||
|
AttrName:
|
||||||
|
'[>=\w]': ?AttrAfterName
|
||||||
|
'<attrName>': _AttrName
|
||||||
|
|
||||||
|
AttrAfterName:
|
||||||
|
'\w': AttrAfterName
|
||||||
|
'>': ?OpeningTagWhitespace
|
||||||
|
'=': +AttrBeforeValue
|
||||||
|
|
||||||
|
AttrBeforeValue:
|
||||||
|
'\w': AttrBeforeValue
|
||||||
|
"'": +AttrSingleQuotedValue
|
||||||
|
'"': +AttrDoubleQuotedValue
|
||||||
|
'': ?AttrUnquotedValue
|
||||||
|
|
||||||
|
AttrSingleQuotedValue:
|
||||||
|
"'": <OpeningTagWhitespace
|
||||||
|
'&': ?AttrValueEntity
|
||||||
|
'\w': ?AttrSingleQuotedValueWhitespace
|
||||||
|
'': AttrSingleQuotedValue
|
||||||
|
|
||||||
|
AttrSingleQuotedValueWhitespace:
|
||||||
|
'\w': AttrSingleQuotedValueWhitespace
|
||||||
|
'&': ?AttrValueEntity
|
||||||
|
'': ?AttrSingleQuotedValue
|
||||||
|
|
||||||
|
AttrDoubleQuotedValue:
|
||||||
|
'"': <OpeningTagWhitespace
|
||||||
|
'&': ?AttrValueEntity
|
||||||
|
'\w': ?AttrDoubleQuotedValueWhitespace
|
||||||
|
'': AttrDoubleQuotedValue
|
||||||
|
|
||||||
|
AttrDoubleQuotedValueWhitespace:
|
||||||
|
'\w': AttrDoubleQuotedValueWhitespace
|
||||||
|
'&': ?AttrValueEntity
|
||||||
|
'': ?AttrDoubleQuotedValue
|
||||||
|
|
||||||
|
AttrUnquotedValue:
|
||||||
|
'\w': ?OpeningTagWhitespace
|
||||||
|
'&': ?AttrValueEntity
|
||||||
|
'': AttrUnquotedValue
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,71 @@
|
||||||
|
import yaml from 'yaml';
|
||||||
|
import {DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||||
|
import {readFileSync, writeFileSync} from 'fs';
|
||||||
|
import {join} from 'path';
|
||||||
|
import {EOL} from 'os';
|
||||||
|
import {parsePattern, TrieBuilder} from './trie';
|
||||||
|
|
||||||
|
const dfa: { [node: string]: { [transition: string]: string } } = yaml.parse(readFileSync(join(DATA_DIR, 'dfa.yaml'), 'utf8'));
|
||||||
|
// These states must always exist; see lex/mod.rs for more details.
|
||||||
|
dfa['TextEntity'] = {};
|
||||||
|
dfa['AttrValueEntity'] = {};
|
||||||
|
dfa['Unknown'] = {};
|
||||||
|
dfa['EOF'] = {};
|
||||||
|
|
||||||
|
const nodes = Object.keys(dfa).sort();
|
||||||
|
|
||||||
|
const rsTransition = (val: string) => {
|
||||||
|
const [_, flag, next] = /^([_<+?]?)(.*)$/.exec(val)!;
|
||||||
|
const consumeMode = {
|
||||||
|
'_': 'AccumulateLowerCase',
|
||||||
|
'': 'Accumulate',
|
||||||
|
'<': 'Current',
|
||||||
|
'+': 'Next',
|
||||||
|
'?': 'Reconsume',
|
||||||
|
}[flag];
|
||||||
|
return `Transition {
|
||||||
|
to: State::${next},
|
||||||
|
consume: ConsumeMode::${consumeMode},
|
||||||
|
}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const output = `
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
|
pub enum State {
|
||||||
|
${nodes.map((n, i) => `${n} = ${i}`).join(`,${EOL} `)}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
|
pub enum ConsumeMode {
|
||||||
|
Current,
|
||||||
|
Next,
|
||||||
|
Reconsume,
|
||||||
|
Accumulate,
|
||||||
|
AccumulateLowerCase,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct Transition {
|
||||||
|
// Make pub to allow destructuring.
|
||||||
|
pub to: State,
|
||||||
|
pub consume: ConsumeMode,
|
||||||
|
}
|
||||||
|
|
||||||
|
${nodes.map(n => {
|
||||||
|
const trieBuilder = new TrieBuilder(n.toUpperCase(), 'Transition');
|
||||||
|
for (const [pat, val] of Object.entries(dfa[n])) {
|
||||||
|
if (pat == '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
trieBuilder.addPattern(parsePattern(pat), rsTransition(val));
|
||||||
|
}
|
||||||
|
if (dfa[n][''] !== undefined) {
|
||||||
|
trieBuilder.fillRemaining(rsTransition(dfa[n]['']));
|
||||||
|
}
|
||||||
|
return trieBuilder.generate();
|
||||||
|
}).join(EOL + EOL)}
|
||||||
|
|
||||||
|
pub static TRANSITIONS: [&'static crate::pattern::TrieNode<Transition>; ${nodes.length}] = [${nodes.map(n => n.toUpperCase()).join(', ')}];
|
||||||
|
`;
|
||||||
|
|
||||||
|
writeFileSync(join(RUST_OUT_DIR, 'dfa.rs'), output);
|
|
@ -0,0 +1,28 @@
|
||||||
|
import {readFileSync, writeFileSync} from 'fs';
|
||||||
|
import {join} from 'path';
|
||||||
|
import {byteStringLiteral, DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||||
|
import {parsePattern, TrieBuilder} from './trie';
|
||||||
|
|
||||||
|
const entities: {[name: string]: {codepoints: number[]; characters: string;}} = JSON.parse(readFileSync(join(DATA_DIR, 'entities.json'), 'utf8'));
|
||||||
|
|
||||||
|
const trieBuilder = new TrieBuilder('ENTITY', "EntityType");
|
||||||
|
trieBuilder.addPattern(parsePattern("&#[0-9]"), 'EntityType::Dec');
|
||||||
|
trieBuilder.addPattern(parsePattern("&#x[0-9a-fA-F]"), 'EntityType::Hex');
|
||||||
|
for (const [rep, entity] of Object.entries(entities)) {
|
||||||
|
const bytes = Buffer.from(entity.characters, 'utf8');
|
||||||
|
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
|
||||||
|
const val = byteStringLiteral(rep.length < bytes.length ? [...rep].map(c => c.charCodeAt(0)) : [...bytes]);
|
||||||
|
trieBuilder.add(rep, `EntityType::Named(${val})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const output = `
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub enum EntityType {
|
||||||
|
Named(&'static [u8]),
|
||||||
|
Dec,
|
||||||
|
Hex,
|
||||||
|
}
|
||||||
|
|
||||||
|
${trieBuilder.generate()}
|
||||||
|
`;
|
||||||
|
writeFileSync(join(RUST_OUT_DIR, 'entities.rs'), output);
|
|
@ -0,0 +1,9 @@
|
||||||
|
{
|
||||||
|
"private": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "^14.0.5",
|
||||||
|
"ts-node": "^8.10.1",
|
||||||
|
"typescript": "^3.7.4",
|
||||||
|
"yaml": "^1.10.0"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
import {readFileSync, writeFileSync} from 'fs';
|
||||||
|
import {DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||||
|
import {join} from 'path';
|
||||||
|
import {EOL} from 'os';
|
||||||
|
|
||||||
|
const patterns: {[name: string]: string} = JSON.parse(readFileSync(join(DATA_DIR, 'patterns.json'), 'utf8'));
|
||||||
|
|
||||||
|
const chr = (str: string, char: number) => str.charCodeAt(char);
|
||||||
|
|
||||||
|
const buildPattern = (seq: string): string => {
|
||||||
|
const dfa = Array.from({length: 256}, () => Array(seq.length).fill(0));
|
||||||
|
|
||||||
|
dfa[chr(seq, 0)][0] = 1;
|
||||||
|
let x = 0;
|
||||||
|
let j = 1;
|
||||||
|
while (j < seq.length) {
|
||||||
|
for (let c = 0; c < 256; c++) {
|
||||||
|
dfa[c][j] = dfa[c][x];
|
||||||
|
}
|
||||||
|
dfa[chr(seq, j)][j] = j + 1;
|
||||||
|
x = dfa[chr(seq, j)][x];
|
||||||
|
j += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return `crate::pattern::SinglePattern::prebuilt(&[${dfa.flat().join(', ')}], ${seq.length})`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const output = Object.entries(patterns)
|
||||||
|
.map(([name, pattern]) => `pub static ${name}: &crate::pattern::SinglePattern = &${buildPattern(pattern)};`);
|
||||||
|
|
||||||
|
writeFileSync(join(RUST_OUT_DIR, 'patterns.rs'), output.join(EOL));
|
|
@ -0,0 +1,171 @@
|
||||||
|
import {EOL} from 'os';
|
||||||
|
|
||||||
|
const customCharClasses = {
|
||||||
|
tagName: '[a-zA-Z-]',
|
||||||
|
attrName: '[a-zA-Z-]',
|
||||||
|
};
|
||||||
|
|
||||||
|
const whitespaceClass = [' ', '\r', '\n', '\t', '\v', '\f'];
|
||||||
|
|
||||||
|
const charRange = (from: string, to: string) => {
|
||||||
|
const res = [];
|
||||||
|
for (let i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
|
||||||
|
res.push(String.fromCharCode(i));
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
|
const parsePatternEscape = (pat: string, at: number): string[] => {
|
||||||
|
switch (pat[at]) {
|
||||||
|
case '\\':
|
||||||
|
return ['\\'];
|
||||||
|
case ']':
|
||||||
|
return [']'];
|
||||||
|
case '<':
|
||||||
|
return ['<'];
|
||||||
|
case 'w':
|
||||||
|
return whitespaceClass;
|
||||||
|
default:
|
||||||
|
throw new Error(`Unknown pattern escape: ${pat[at]}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const parsePatternClass = (pat: string, from: number): [string[], number] => {
|
||||||
|
const chars: string[] = [];
|
||||||
|
for (let i = from; i < pat.length; i++) {
|
||||||
|
switch (pat[i]) {
|
||||||
|
case '\\':
|
||||||
|
chars.push(...parsePatternEscape(pat, ++i));
|
||||||
|
break;
|
||||||
|
case ']':
|
||||||
|
return [chars, i];
|
||||||
|
default:
|
||||||
|
if (pat[i + 1] === '-' && pat[i + 2] !== undefined) {
|
||||||
|
chars.push(...charRange(pat[i], pat[i + 2]));
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
chars.push(pat[i]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error(`Unexpected end of pattern: ${pat}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
const parsePatternCustomClass = (pat: string, from: number): [string[], number] => {
|
||||||
|
const endIdx = pat.indexOf('>', from);
|
||||||
|
if (endIdx == -1) throw new Error(`Unexpected end of pattern: ${pat}`);
|
||||||
|
return [parsePatternClass(customCharClasses[pat.slice(from, endIdx)], 1)[0], endIdx];
|
||||||
|
};
|
||||||
|
|
||||||
|
export const parsePattern = (pat: string): string[][] => {
|
||||||
|
const res: string[][] = [];
|
||||||
|
for (let i = 0; i < pat.length; i++) {
|
||||||
|
switch (pat[i]) {
|
||||||
|
case '\\':
|
||||||
|
res.push(parsePatternEscape(pat, ++i));
|
||||||
|
break;
|
||||||
|
case '[':
|
||||||
|
const sg = parsePatternClass(pat, i + 1);
|
||||||
|
res.push(sg[0]);
|
||||||
|
i = sg[1];
|
||||||
|
break;
|
||||||
|
case '<':
|
||||||
|
const cc = parsePatternCustomClass(pat, i + 1);
|
||||||
|
res.push(cc[0]);
|
||||||
|
i = cc[1];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
res.push([pat[i]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
|
type Node = {
|
||||||
|
children: Node[];
|
||||||
|
value?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const createNode = (value?: string) => ({value, children: []});
|
||||||
|
|
||||||
|
export class TrieBuilder {
|
||||||
|
private readonly root: Node = createNode();
|
||||||
|
|
||||||
|
private readonly variables: string[] = [];
|
||||||
|
private nextId: number = 0;
|
||||||
|
private readonly codeCache: Map<string, string> = new Map();
|
||||||
|
|
||||||
|
constructor (
|
||||||
|
private readonly name: string,
|
||||||
|
private readonly valueType: string,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
fillRemaining (val: string): this {
|
||||||
|
const {children} = this.root;
|
||||||
|
for (let i = 0; i < 256; i++) {
|
||||||
|
children[i] = children[i] || createNode(val);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
add (seq: string, val: string): this {
|
||||||
|
let cur: Node = this.root;
|
||||||
|
for (let i = 0; i < seq.length; i++) {
|
||||||
|
const c = seq.charCodeAt(i);
|
||||||
|
if (c > 255) throw new Error('Not a byte');
|
||||||
|
cur = cur.children[c] = cur.children[c] || createNode();
|
||||||
|
}
|
||||||
|
cur.value = val;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
addPattern (pattern: string[][], val: string): this {
|
||||||
|
let cur: Node[] = [this.root];
|
||||||
|
for (const cls of pattern) {
|
||||||
|
const next: Node[] = [];
|
||||||
|
for (let i = 0; i < cls.length; i++) {
|
||||||
|
if (cls[i].length !== 1) throw new Error(`Not a byte`);
|
||||||
|
const c = cls[i].charCodeAt(0);
|
||||||
|
if (c > 255) throw new Error('Not a byte');
|
||||||
|
next.push(...cur.map(n => n.children[c] = n.children[c] || createNode()));
|
||||||
|
}
|
||||||
|
cur = next;
|
||||||
|
}
|
||||||
|
cur.forEach(n => n.value = val);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate the code for a node's variable name and value, and return the name.
|
||||||
|
private generateNode (node: Node): string {
|
||||||
|
// Only generate elements up to the last non-undefined child to cut down on size of array.
|
||||||
|
const children = Array.from(
|
||||||
|
{length: node.children.length},
|
||||||
|
(_, i) => node.children[i] ? `Some(${this.generateNode(node.children[i])})` : 'None',
|
||||||
|
).join(', ');
|
||||||
|
|
||||||
|
const value = node.value === undefined ? 'None' : `Some(${node.value})`;
|
||||||
|
const varValue = `&crate::pattern::TrieNode {
|
||||||
|
value: ${value},
|
||||||
|
children: &[${children}],
|
||||||
|
}`;
|
||||||
|
const existingVarName = this.codeCache.get(varValue);
|
||||||
|
if (existingVarName) {
|
||||||
|
return existingVarName;
|
||||||
|
}
|
||||||
|
|
||||||
|
const name = `${this.name}_NODE_${this.nextId++}`;
|
||||||
|
this.variables.push(`static ${name}: &'static crate::pattern::TrieNode<${this.valueType}> = ${varValue};`);
|
||||||
|
this.codeCache.set(varValue, name);
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
generate (): string {
|
||||||
|
this.variables.splice(0, this.variables.length);
|
||||||
|
this.nextId = 0;
|
||||||
|
const rootName = this.generateNode(this.root);
|
||||||
|
// Make root node public and use proper name.
|
||||||
|
return this.variables.join(EOL + EOL).replace(`static ${rootName}`, `pub static ${this.name}`);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
{
|
||||||
|
"include": [
|
||||||
|
"*.ts"
|
||||||
|
],
|
||||||
|
"compilerOptions": {
|
||||||
|
"allowJs": false,
|
||||||
|
"alwaysStrict": true,
|
||||||
|
"declaration": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"lib": [
|
||||||
|
"es2020"
|
||||||
|
],
|
||||||
|
"module": "commonjs",
|
||||||
|
"noFallthroughCasesInSwitch": true,
|
||||||
|
"noImplicitAny": true,
|
||||||
|
"noImplicitReturns": true,
|
||||||
|
"noImplicitThis": true,
|
||||||
|
"noUnusedParameters": true,
|
||||||
|
"outDir": "dist",
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"strict": true,
|
||||||
|
"strictFunctionTypes": true,
|
||||||
|
"strictNullChecks": true,
|
||||||
|
"strictPropertyInitialization": true,
|
||||||
|
"suppressImplicitAnyIndexErrors": true,
|
||||||
|
"target": "es6"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue