Precompute generated files instead of building them every compile
This commit is contained in:
parent
2591f2879d
commit
99a13e3243
|
@ -1 +1,2 @@
|
|||
/target
|
||||
/src/gen/
|
||||
|
|
|
@ -23,11 +23,6 @@ name = "bitflags"
|
|||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cascade"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.10"
|
||||
|
@ -47,11 +42,6 @@ dependencies = [
|
|||
"vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrie"
|
||||
version = "0.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.14"
|
||||
|
@ -82,19 +72,10 @@ dependencies = [
|
|||
name = "hyperbuild"
|
||||
version = "0.0.45"
|
||||
dependencies = [
|
||||
"cascade 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fastrie 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
|
@ -241,39 +222,6 @@ dependencies = [
|
|||
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.110"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.110"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.3"
|
||||
|
@ -387,14 +335,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
"checksum cascade 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "31c9ddf4a1a9dbf82e130117f81b0c292fb5416000cbaba11eb92a65face2613"
|
||||
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
|
||||
"checksum fastrie 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "16a8e873087682100de15eaccd3f4671c44fe589bd8989a854c061c961884d16"
|
||||
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
|
||||
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
||||
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
|
||||
"checksum itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
|
||||
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
"checksum libc 0.2.69 (registry+https://github.com/rust-lang/crates.io-index)" = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005"
|
||||
"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
|
||||
|
@ -412,10 +357,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||
"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
|
||||
"checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
|
||||
"checksum ryu 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
|
||||
"checksum serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
|
||||
"checksum serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
|
||||
"checksum serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)" = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2"
|
||||
"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
|
||||
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
"checksum structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
|
||||
|
|
|
@ -10,20 +10,14 @@ repository = "https://github.com/wilsonzlin/hyperbuild.git"
|
|||
version = "0.0.45"
|
||||
authors = ["Wilson Lin <code@wilsonl.in>"]
|
||||
edition = "2018"
|
||||
include = ["/gen/*.json", "/src/**/*", "/build.rs", "/Cargo.toml", "/LICENSE", "/README.md"]
|
||||
include = ["/src/**/*", "/Cargo.toml", "/LICENSE", "/README.md"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "actively-developed" }
|
||||
|
||||
[dependencies]
|
||||
cascade = "0.1.4"
|
||||
fastrie = "0.0.6"
|
||||
phf = { version = "0.8.0", features = ["macros"] }
|
||||
structopt = "0.3.5"
|
||||
|
||||
[build-dependencies]
|
||||
serde = { version = "1.0.104", features = ["derive"] }
|
||||
serde_json = "1.0.44"
|
||||
|
||||
[profile.release]
|
||||
panic = 'abort'
|
||||
|
|
309
build.rs
309
build.rs
|
@ -1,309 +0,0 @@
|
|||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::ops::{Index, IndexMut};
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
fn create_byte_string_literal(bytes: &[u8]) -> String {
|
||||
format!("b\"{}\"", bytes
|
||||
.iter()
|
||||
.map(|&b| if b >= b' ' && b <= b'~' && b != b'\\' && b != b'"' {
|
||||
(b as char).to_string()
|
||||
} else {
|
||||
format!("\\x{:02x}", b)
|
||||
})
|
||||
.collect::<String>())
|
||||
}
|
||||
|
||||
fn read_json<T>(name: &str) -> T where for<'de> T: Deserialize<'de> {
|
||||
let patterns_path = Path::new("gen").join(format!("{}.json", name));
|
||||
let patterns_file = File::open(patterns_path).unwrap();
|
||||
serde_json::from_reader(patterns_file).unwrap()
|
||||
}
|
||||
|
||||
fn write_rs(name: &str, code: String) -> () {
|
||||
let out_dir = env::var("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name));
|
||||
let mut dest_file = File::create(&dest_path).unwrap();
|
||||
dest_file.write_all(code.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
fn name_words(n: &str) -> Vec<String> {
|
||||
n.split(' ').map(|w| w.to_string()).collect::<Vec<String>>()
|
||||
}
|
||||
|
||||
fn snake_case(n: &Vec<String>) -> String {
|
||||
n.iter().map(|w| w.to_uppercase()).collect::<Vec<String>>().join("_")
|
||||
}
|
||||
|
||||
fn camel_case(n: &Vec<String>) -> String {
|
||||
n.iter().map(|w| format!(
|
||||
"{}{}",
|
||||
w.as_bytes()[0].to_ascii_uppercase() as char,
|
||||
std::str::from_utf8(&w.as_bytes()[1..]).unwrap(),
|
||||
)).collect::<Vec<String>>().join("")
|
||||
}
|
||||
|
||||
pub struct TwoDimensionalArray {
|
||||
data: Vec<usize>,
|
||||
cols: usize,
|
||||
}
|
||||
|
||||
impl TwoDimensionalArray {
|
||||
pub fn new(rows: usize, cols: usize) -> TwoDimensionalArray {
|
||||
TwoDimensionalArray {
|
||||
data: vec![0usize; rows * cols],
|
||||
cols,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prebuilt(data: Vec<usize>, cols: usize) -> TwoDimensionalArray {
|
||||
TwoDimensionalArray { data, cols }
|
||||
}
|
||||
}
|
||||
|
||||
type TwoDimensionalArrayIndex = (usize, usize);
|
||||
|
||||
impl Index<TwoDimensionalArrayIndex> for TwoDimensionalArray {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, (row, col): TwoDimensionalArrayIndex) -> &Self::Output {
|
||||
&self.data[row * self.cols + col]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<TwoDimensionalArrayIndex> for TwoDimensionalArray {
|
||||
fn index_mut(&mut self, (row, col): TwoDimensionalArrayIndex) -> &mut Self::Output {
|
||||
&mut self.data[row * self.cols + col]
|
||||
}
|
||||
}
|
||||
|
||||
fn build_pattern(pattern: String) -> String {
|
||||
assert!(pattern.is_ascii());
|
||||
let seq = pattern.as_bytes();
|
||||
let dfa = &mut TwoDimensionalArray::new(256, seq.len());
|
||||
|
||||
dfa[(seq[0] as usize, 0)] = 1;
|
||||
let mut x = 0;
|
||||
let mut j = 1;
|
||||
while j < seq.len() {
|
||||
for c in 0..256 {
|
||||
dfa[(c, j)] = dfa[(c, x)];
|
||||
};
|
||||
dfa[(seq[j] as usize, j)] = j + 1;
|
||||
x = dfa[(seq[j] as usize, x)];
|
||||
j += 1;
|
||||
};
|
||||
|
||||
format!(
|
||||
"crate::pattern::SinglePattern::prebuilt(&[{}], {})",
|
||||
dfa.data.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "),
|
||||
seq.len(),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct TagAttr {
|
||||
boolean: bool,
|
||||
redundant_if_empty: bool,
|
||||
collapse_and_trim: bool,
|
||||
default_value: Option<String>,
|
||||
}
|
||||
|
||||
impl TagAttr {
|
||||
fn code(&self) -> String {
|
||||
format!(r"
|
||||
AttributeMinification {{
|
||||
boolean: {boolean},
|
||||
redundant_if_empty: {redundant_if_empty},
|
||||
collapse_and_trim: {collapse_and_trim},
|
||||
default_value: {default_value},
|
||||
}}
|
||||
",
|
||||
boolean = self.boolean,
|
||||
redundant_if_empty = self.redundant_if_empty,
|
||||
collapse_and_trim = self.collapse_and_trim,
|
||||
default_value = match &self.default_value {
|
||||
Some(val) => format!("Some({})", create_byte_string_literal(val.as_bytes())),
|
||||
None => "None".to_string(),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_attr_map() {
|
||||
let attrs: HashMap<String, HashMap<String, HashMap<String, TagAttr>>> = read_json("attrs");
|
||||
let mut code = String::new();
|
||||
for (attr_name, namespaces) in attrs.iter() {
|
||||
let mut by_namespace_code = String::new();
|
||||
by_namespace_code.push_str(format!("static {}_ATTR: ByNamespace = ByNamespace {{\n", attr_name.to_uppercase()).as_str());
|
||||
for namespace in ["html".to_string(), "svg".to_string()].iter() {
|
||||
by_namespace_code.push_str(format!("\t{}: ", namespace).as_str());
|
||||
match namespaces.get(namespace) {
|
||||
None => by_namespace_code.push_str("None"),
|
||||
Some(tags_map) => {
|
||||
if let Some(global_attr) = tags_map.get("*") {
|
||||
code.push_str(format!(
|
||||
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements({});\n\n",
|
||||
namespace.to_uppercase(),
|
||||
attr_name.to_uppercase(),
|
||||
global_attr.code(),
|
||||
).as_str());
|
||||
} else {
|
||||
code.push_str(format!(
|
||||
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {{\n{}\n}});\n\n",
|
||||
namespace.to_uppercase(),
|
||||
attr_name.to_uppercase(),
|
||||
tags_map
|
||||
.iter()
|
||||
.map(|(tag_name, tag_attr)| format!(
|
||||
"b\"{}\" => {}",
|
||||
tag_name,
|
||||
tag_attr.code(),
|
||||
))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",\n"),
|
||||
).as_str());
|
||||
};
|
||||
by_namespace_code.push_str(format!("Some({}_{}_ATTR)", namespace.to_uppercase(), attr_name.to_uppercase()).as_str());
|
||||
}
|
||||
};
|
||||
by_namespace_code.push_str(",\n");
|
||||
};
|
||||
by_namespace_code.push_str("};\n\n");
|
||||
code.push_str(&by_namespace_code);
|
||||
};
|
||||
code.push_str("pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n");
|
||||
for attr_name in attrs.keys() {
|
||||
code.push_str(format!("\tb\"{}\" => {}_ATTR,\n", attr_name, attr_name.to_uppercase()).as_str());
|
||||
};
|
||||
code.push_str("});\n\n");
|
||||
write_rs("attrs", code);
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct Entity {
|
||||
codepoints: Vec<u32>,
|
||||
characters: String,
|
||||
}
|
||||
|
||||
pub struct TrieBuilderNode {
|
||||
value: Option<String>,
|
||||
children: Vec<Option<TrieBuilderNode>>,
|
||||
}
|
||||
|
||||
struct TrieBuilderGenerationContext<'t, 'v, 'o> {
|
||||
trie_name: &'t str,
|
||||
value_type: &'v str,
|
||||
next_id: usize,
|
||||
out: &'o mut String,
|
||||
}
|
||||
|
||||
impl<'t, 'v, 'o> TrieBuilderGenerationContext<'t, 'v, 'o> {
|
||||
pub fn id(&mut self) -> usize {
|
||||
let next = self.next_id;
|
||||
self.next_id += 1;
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
impl TrieBuilderNode {
|
||||
pub fn new() -> TrieBuilderNode {
|
||||
let mut children = Vec::new();
|
||||
for _ in 0..256 {
|
||||
children.push(None);
|
||||
};
|
||||
TrieBuilderNode { value: None, children }
|
||||
}
|
||||
|
||||
pub fn add(&mut self, seq: &[u8], value: String) -> () {
|
||||
let mut current = self;
|
||||
for c in seq.iter() {
|
||||
current = current.children[*c as usize].get_or_insert_with(|| TrieBuilderNode::new());
|
||||
};
|
||||
current.value.replace(value);
|
||||
}
|
||||
|
||||
fn _generated_node_var_name(&self, trie_name: &str, node_id: usize) -> String {
|
||||
format!("{trie_name}_NODE_{node_id}", trie_name = trie_name, node_id = node_id)
|
||||
}
|
||||
|
||||
fn _generate(&self, ctx: &mut TrieBuilderGenerationContext) -> usize {
|
||||
let children = self.children.iter().map(|c| match c {
|
||||
None => "None".to_string(),
|
||||
Some(c) => {
|
||||
let child_id = c._generate(ctx);
|
||||
format!("Some({})", self._generated_node_var_name(ctx.trie_name, child_id))
|
||||
}
|
||||
}).collect::<Vec<String>>().join(", ");
|
||||
let id = ctx.id();
|
||||
let code = format!(
|
||||
"static {var_name}: &'static crate::pattern::TrieNode<{value_type}> = &crate::pattern::TrieNode {{\n\tvalue: {value},\n\tchildren: [{children}],\n}};\n\n",
|
||||
var_name = self._generated_node_var_name(ctx.trie_name, id),
|
||||
value_type = ctx.value_type,
|
||||
value = self.value.as_ref().map_or("None".to_string(), |v| format!("Some({})", v)),
|
||||
children = children,
|
||||
);
|
||||
ctx.out.push_str(code.as_str());
|
||||
id
|
||||
}
|
||||
|
||||
pub fn generate(&self, trie_name: &str, value_type: &str) -> String {
|
||||
let mut out = String::new();
|
||||
let mut ctx = TrieBuilderGenerationContext {
|
||||
trie_name,
|
||||
value_type,
|
||||
next_id: 0,
|
||||
out: &mut out,
|
||||
};
|
||||
let root_id = self._generate(&mut ctx);
|
||||
// Make root node public and use proper name.
|
||||
ctx.out.replace(
|
||||
format!("static {}", self._generated_node_var_name(trie_name, root_id)).as_str(),
|
||||
format!("pub static {}", trie_name).as_str()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_entities() {
|
||||
// Read named entities map from JSON file.
|
||||
let entities: HashMap<String, Entity> = read_json("entities");
|
||||
|
||||
// Add entities to trie builder.
|
||||
let mut trie_builder: TrieBuilderNode = TrieBuilderNode::new();
|
||||
for (rep, entity) in entities {
|
||||
let val = if rep.as_bytes().len() < entity.characters.as_bytes().len() {
|
||||
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
|
||||
println!("Entity {} is shorter than decoded UTF-8 bytes...", rep);
|
||||
// Include '&' in value.
|
||||
create_byte_string_literal(rep.as_bytes())
|
||||
} else {
|
||||
create_byte_string_literal(entity.characters.as_bytes())
|
||||
};
|
||||
trie_builder.add(&(rep.as_bytes())[1..], val);
|
||||
};
|
||||
// Write trie code to output Rust file.
|
||||
write_rs("entities", trie_builder.generate(
|
||||
"ENTITY_REFERENCES",
|
||||
"&'static [u8]",
|
||||
));
|
||||
}
|
||||
|
||||
fn generate_patterns() {
|
||||
let patterns: HashMap<String, String> = read_json("patterns");
|
||||
|
||||
for (name, pattern) in patterns {
|
||||
let mut code = String::new();
|
||||
code.push_str(format!("static {}: &crate::pattern::SinglePattern = &{};", name, build_pattern(pattern)).as_str());
|
||||
write_rs(format!("pattern_{}", name).as_str(), code);
|
||||
};
|
||||
}
|
||||
|
||||
fn main() {
|
||||
generate_attr_map();
|
||||
generate_entities();
|
||||
generate_patterns();
|
||||
}
|
|
@ -1,2 +1,2 @@
|
|||
node_modules/
|
||||
build/*.d.ts
|
||||
/package-lock.json
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
import { join } from "path";
|
||||
import {mkdirSync, writeFileSync} from 'fs';
|
||||
|
||||
export const RUST_OUT_DIR = join(__dirname, '..', 'src', 'gen');
|
||||
|
||||
try {
|
||||
mkdirSync(RUST_OUT_DIR);
|
||||
} catch (err) {
|
||||
if (err.code !== 'EEXIST') {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
writeFileSync(join(RUST_OUT_DIR, 'mod.rs'), `
|
||||
pub mod attrs;
|
||||
pub mod dfa;
|
||||
pub mod entities;
|
||||
pub mod patterns;
|
||||
`);
|
||||
|
||||
export const DATA_DIR = join(__dirname, 'data');
|
||||
|
||||
export const leftPad = (str: string, n: number) => '0'.repeat(n - str.length) + str;
|
||||
|
||||
export const prettyJson = (v: any) => JSON.stringify(v, null, 2);
|
||||
|
||||
export const byteStringLiteral = (bytes: number[]): string => 'b"' + bytes.map(c => {
|
||||
if (c > 255) throw new Error('Not a byte');
|
||||
// 0x20 == ' '.
|
||||
// 0x7E == '~'.
|
||||
// 0x5C == '\\'.
|
||||
// 0x22 == '"'.
|
||||
if (c >= 0x20 && c <= 0x7E && c != 0x5C && c != 0x22) {
|
||||
return String.fromCharCode(c);
|
||||
} else {
|
||||
return `\\x${leftPad(c.toString(16), 2)}`;
|
||||
}
|
||||
}).join('') + '"';
|
4431
gen/attrs.json
4431
gen/attrs.json
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,210 @@
|
|||
import {readFileSync, writeFileSync} from 'fs';
|
||||
import ts, {Node, SourceFile, SyntaxKind, Type} from 'typescript';
|
||||
import {join} from 'path';
|
||||
import {DATA_DIR, prettyJson, RUST_OUT_DIR} from './_common';
|
||||
|
||||
const reactDeclarations = readFileSync(join(__dirname, 'data', 'react.d.ts'), 'utf8');
|
||||
|
||||
// TODO Consider and check behaviour when value matches case insensitively, after trimming whitespace, numerically (for number values), etc.
|
||||
// TODO This file is currently manually sourced and written. Try to get machine-readable spec and automate.
|
||||
const defaultAttributeValues: {
|
||||
[attr: string]: {
|
||||
tags: string[];
|
||||
defaultValue: string;
|
||||
isPositiveInteger?: boolean;
|
||||
}[];
|
||||
} = JSON.parse(readFileSync(join(DATA_DIR, 'attrs.json'), 'utf8'));
|
||||
|
||||
const tagNameNormalised = {
|
||||
'anchor': 'a',
|
||||
};
|
||||
|
||||
const attrNameNormalised = {
|
||||
'classname': 'class',
|
||||
};
|
||||
|
||||
const reactSpecificAttributes = [
|
||||
'defaultChecked', 'defaultValue', 'suppressContentEditableWarning', 'suppressHydrationWarning',
|
||||
];
|
||||
|
||||
const collapsibleAndTrimmable = {
|
||||
'class': ['html:*'],
|
||||
'd': ['svg:*'],
|
||||
};
|
||||
|
||||
// TODO Is escapedText the API for getting name?
|
||||
const getNameOfNode = (n: any) => n.name.escapedText;
|
||||
const normaliseName = (name: string, norms: { [name: string]: string }) => [name.toLowerCase()].map(n => norms[n] || n)[0];
|
||||
|
||||
type AttrConfig = {
|
||||
boolean: boolean;
|
||||
redundantIfEmpty: boolean;
|
||||
collapseAndTrim: boolean;
|
||||
defaultValue?: string;
|
||||
};
|
||||
|
||||
const rsTagAttr = ({
|
||||
redundantIfEmpty,
|
||||
defaultValue,
|
||||
collapseAndTrim,
|
||||
boolean,
|
||||
}: AttrConfig) => `AttributeMinification {
|
||||
boolean: ${boolean},
|
||||
redundant_if_empty: ${redundantIfEmpty},
|
||||
collapse_and_trim: ${collapseAndTrim},
|
||||
default_value: ${defaultValue == undefined ? 'None' : `Some(b"${defaultValue}")`},
|
||||
}`;
|
||||
|
||||
const processReactTypeDeclarations = (source: SourceFile) => {
|
||||
const nodes: Node[] = [source];
|
||||
// Use index-based loop to keep iterating as nodes array grows.
|
||||
for (let i = 0; i < nodes.length; i++) {
|
||||
// forEachChild doesn't work if return value is number (e.g. return value of Array.prototype.push).
|
||||
nodes[i].forEachChild(c => void nodes.push(c));
|
||||
}
|
||||
const attributeNodes = nodes
|
||||
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
|
||||
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
|
||||
.filter(([matches]) => !!matches)
|
||||
.map(([matches, node]) => [matches![2].toLowerCase(), normaliseName(matches![1], tagNameNormalised), node])
|
||||
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
|
||||
.map(([namespace, tag, node]) => ({namespace, tag, node}))
|
||||
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
|
||||
|
||||
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
|
||||
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
|
||||
throw new Error(`Global HTML attributes is not first to be processed`);
|
||||
}
|
||||
|
||||
// Map structure: attr => namespace => tag => config.
|
||||
const attributes = new Map<string, Map<'html' | 'svg', Map<string, AttrConfig>>>();
|
||||
|
||||
for (const {namespace, tag, node} of attributeNodes) {
|
||||
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
|
||||
for (const n of node.members.filter((n: Node) => n.kind === ts.SyntaxKind.PropertySignature)) {
|
||||
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
|
||||
if (reactSpecificAttributes.includes(attrName)) continue;
|
||||
|
||||
const types: SyntaxKind[] = n.type.kind === ts.SyntaxKind.UnionType
|
||||
? n.type.types.map((t: Node) => t.kind)
|
||||
: [n.type.kind];
|
||||
|
||||
const boolean = types.includes(ts.SyntaxKind.BooleanKeyword);
|
||||
// If types includes boolean and string, make it a boolean attr to prevent it from being removed if empty value.
|
||||
const redundantIfEmpty = !boolean && types.some(t => t === ts.SyntaxKind.StringKeyword || t === ts.SyntaxKind.NumberKeyword);
|
||||
const defaultValues = (defaultAttributeValues[attrName] || [])
|
||||
.filter(a => a.tags.includes(fullyQualifiedTagName))
|
||||
.map(a => a.defaultValue);
|
||||
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
|
||||
if (defaultValues.length > 1) {
|
||||
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValues}`);
|
||||
}
|
||||
const attr: AttrConfig = {
|
||||
boolean,
|
||||
redundantIfEmpty,
|
||||
collapseAndTrim,
|
||||
defaultValue: defaultValues[0],
|
||||
};
|
||||
|
||||
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
|
||||
const namespacesForAttribute = attributes.get(attrName)!;
|
||||
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
|
||||
const tagsForNsAttribute = namespacesForAttribute.get(namespace)!;
|
||||
if (tagsForNsAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
|
||||
|
||||
const globalAttr = tagsForNsAttribute.get('*');
|
||||
if (globalAttr) {
|
||||
if (globalAttr.boolean !== attr.boolean
|
||||
|| globalAttr.redundantIfEmpty !== attr.redundantIfEmpty
|
||||
|| globalAttr.collapseAndTrim !== attr.collapseAndTrim
|
||||
|| globalAttr.defaultValue !== attr.defaultValue) {
|
||||
throw new Error(`Global and tag-specific attributes conflict: ${prettyJson(globalAttr)} ${prettyJson(attr)}`);
|
||||
}
|
||||
} else {
|
||||
tagsForNsAttribute.set(tag || '*', attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let code = `
|
||||
use crate::spec::tag::ns::Namespace;
|
||||
|
||||
pub struct AttributeMinification {
|
||||
pub boolean: bool,
|
||||
pub redundant_if_empty: bool,
|
||||
pub collapse_and_trim: bool,
|
||||
pub default_value: Option<&'static [u8]>,
|
||||
}
|
||||
|
||||
pub enum AttrMapEntry {
|
||||
AllNamespaceElements(AttributeMinification),
|
||||
SpecificNamespaceElements(phf::Map<&'static [u8], AttributeMinification>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct ByNamespace {
|
||||
// Make pub so this struct can be statically created in gen/attrs.rs.
|
||||
pub html: Option<&'static AttrMapEntry>,
|
||||
pub svg: Option<&'static AttrMapEntry>,
|
||||
}
|
||||
|
||||
impl ByNamespace {
|
||||
fn get(&self, ns: Namespace) -> Option<&'static AttrMapEntry> {
|
||||
match ns {
|
||||
Namespace::Html => self.html,
|
||||
Namespace::Svg => self.svg,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AttrMap(phf::Map<&'static [u8], ByNamespace>);
|
||||
|
||||
impl AttrMap {
|
||||
pub const fn new(map: phf::Map<&'static [u8], ByNamespace>) -> AttrMap {
|
||||
AttrMap(map)
|
||||
}
|
||||
|
||||
pub fn get(&self, ns: Namespace, tag: &[u8], attr: &[u8]) -> Option<&AttributeMinification> {
|
||||
self.0.get(attr).and_then(|namespaces| namespaces.get(ns)).and_then(|entry| match entry {
|
||||
AttrMapEntry::AllNamespaceElements(min) => Some(min),
|
||||
AttrMapEntry::SpecificNamespaceElements(map) => map.get(tag),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
`;
|
||||
|
||||
for (const [attrName, namespaces] of attributes) {
|
||||
let byNsCode = '';
|
||||
byNsCode += `static ${attrName.toUpperCase()}_ATTR: ByNamespace = ByNamespace {\n`;
|
||||
for (const ns of ['html', 'svg'] as const) {
|
||||
byNsCode += `\t${ns}: `;
|
||||
const tagsMap = namespaces.get(ns);
|
||||
if (!tagsMap) {
|
||||
byNsCode += 'None';
|
||||
} else {
|
||||
const globalAttr = tagsMap.get('*');
|
||||
if (globalAttr) {
|
||||
code += `static ${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements(${rsTagAttr(globalAttr)});\n\n`;
|
||||
} else {
|
||||
code += `static ${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {\n${
|
||||
[...tagsMap].map(([tagName, tagAttr]) => `b\"${tagName}\" => ${rsTagAttr(tagAttr)}`).join(',\n')
|
||||
}\n});\n\n`;
|
||||
}
|
||||
byNsCode += `Some(${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR)`;
|
||||
}
|
||||
byNsCode += ',\n';
|
||||
}
|
||||
byNsCode += '};\n\n';
|
||||
code += byNsCode;
|
||||
}
|
||||
code += 'pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n';
|
||||
for (const attr_name of attributes.keys()) {
|
||||
code += `\tb\"${attr_name}\" => ${attr_name.toUpperCase()}_ATTR,\n`;
|
||||
}
|
||||
code += '});\n\n';
|
||||
return code;
|
||||
};
|
||||
|
||||
const source = ts.createSourceFile(`react.d.ts`, reactDeclarations, ts.ScriptTarget.ES2020);
|
||||
writeFileSync(join(RUST_OUT_DIR, 'attrs.rs'), processReactTypeDeclarations(source));
|
|
@ -1,215 +0,0 @@
|
|||
const request = require('request-promise-native');
|
||||
const {promises: fs} = require('fs');
|
||||
const ts = require('typescript');
|
||||
const path = require('path');
|
||||
|
||||
const compareEntryNames = (a, b) => a[0].localeCompare(b[0]);
|
||||
const deepObjectifyMap = map => Object.fromEntries(
|
||||
[...map.entries()]
|
||||
.map(([key, value]) => [key, value instanceof Map ? deepObjectifyMap(value) : value])
|
||||
.sort(compareEntryNames)
|
||||
);
|
||||
const fromCamelCase = camelCase => camelCase.split(/(?=^|[A-Z])/).map(w => w.toLowerCase());
|
||||
const prettyjson = v => JSON.stringify(v, null, 2);
|
||||
|
||||
const ATTRS_PATH = path.join(__dirname, '..', 'attrs.json');
|
||||
|
||||
const REACT_TYPINGS_URL = 'https://raw.githubusercontent.com/DefinitelyTyped/DefinitelyTyped/master/types/react/index.d.ts';
|
||||
const REACT_TYPINGS_FILE = path.join(__dirname, 'react.d.ts');
|
||||
const fetchReactTypingsSource = async () => {
|
||||
try {
|
||||
return await fs.readFile(REACT_TYPINGS_FILE, 'utf8');
|
||||
} catch (err) {
|
||||
if (err.code !== 'ENOENT') {
|
||||
throw err;
|
||||
}
|
||||
const source = await request(REACT_TYPINGS_URL);
|
||||
await fs.writeFile(REACT_TYPINGS_FILE, source);
|
||||
return source;
|
||||
}
|
||||
};
|
||||
|
||||
const tagNameNormalised = {
|
||||
'anchor': 'a',
|
||||
};
|
||||
|
||||
const attrNameNormalised = {
|
||||
'classname': 'class',
|
||||
};
|
||||
|
||||
const reactSpecificAttributes = [
|
||||
'defaultChecked', 'defaultValue', 'suppressContentEditableWarning', 'suppressHydrationWarning',
|
||||
];
|
||||
|
||||
// TODO Consider and check behaviour when value matches case insensitively, after trimming whitespace, numerically (for number values), etc.
|
||||
// TODO This is currently manually sourced and written. Try to get machine-readable spec and automate.
|
||||
const defaultAttributeValues = {
|
||||
'align': [{
|
||||
tags: ['html:img'],
|
||||
defaultValue: 'bottom',
|
||||
}],
|
||||
'decoding': [{
|
||||
tags: ['html:img'],
|
||||
defaultValue: 'auto',
|
||||
}],
|
||||
'enctype': [{
|
||||
tags: ['html:form'],
|
||||
defaultValue: 'application/x-www-form-urlencoded',
|
||||
}],
|
||||
'frameborder': [{
|
||||
tags: ['html:iframe'],
|
||||
defaultValue: '1',
|
||||
isPositiveInteger: true,
|
||||
}],
|
||||
'formenctype': [{
|
||||
tags: ['html:button', 'html:input'],
|
||||
defaultValue: 'application/x-www-form-urlencoded',
|
||||
}],
|
||||
'height': [{
|
||||
tags: ['html:iframe'],
|
||||
defaultValue: '150',
|
||||
isPositiveInteger: true,
|
||||
}],
|
||||
'importance': [{
|
||||
tags: ['html:iframe'],
|
||||
defaultValue: 'auto',
|
||||
}],
|
||||
'loading': [{
|
||||
tags: ['html:iframe', 'html:img'],
|
||||
defaultValue: 'eager',
|
||||
}],
|
||||
'media': [{
|
||||
tags: ['html:style'],
|
||||
defaultValue: 'all',
|
||||
}],
|
||||
'method': [{
|
||||
tags: ['html:form'],
|
||||
defaultValue: 'get',
|
||||
}],
|
||||
'referrerpolicy': [{
|
||||
tags: ['html:iframe', 'html:img'],
|
||||
defaultValue: 'no-referrer-when-downgrade',
|
||||
}],
|
||||
'rules': [{
|
||||
tags: ['html:table'],
|
||||
defaultValue: 'none',
|
||||
}],
|
||||
'shape': [{
|
||||
tags: ['html:area'],
|
||||
defaultValue: 'rect',
|
||||
}],
|
||||
'span': [{
|
||||
tags: ['html:col', 'html:colgroup'],
|
||||
defaultValue: '1',
|
||||
isPositiveInteger: true,
|
||||
}],
|
||||
'target': [{
|
||||
tags: ['html:a', 'html:form'],
|
||||
defaultValue: '_self',
|
||||
}],
|
||||
'type': [{
|
||||
tags: ['html:button'],
|
||||
defaultValue: 'submit',
|
||||
}, {
|
||||
tags: ['html:input'],
|
||||
defaultValue: 'text',
|
||||
}, {
|
||||
tags: ['html:link', 'html:style'],
|
||||
defaultValue: 'text/css',
|
||||
}],
|
||||
'width': [{
|
||||
tags: ['html:iframe'],
|
||||
defaultValue: '300',
|
||||
isPositiveInteger: true,
|
||||
}]
|
||||
};
|
||||
|
||||
const collapsibleAndTrimmable = {
|
||||
'class': ['html:*'],
|
||||
'd': ['svg:*'],
|
||||
};
|
||||
|
||||
// TODO Is escapedText the API for getting name?
|
||||
const getNameOfNode = n => n.name.escapedText;
|
||||
const normaliseName = (name, norms) => [name.toLowerCase()].map(n => norms[n] || n)[0];
|
||||
|
||||
const processReactTypeDeclarations = async (source) => {
|
||||
const nodes = [source];
|
||||
// Use index-based loop to keep iterating as nodes array grows.
|
||||
for (let i = 0; i < nodes.length; i++) {
|
||||
// forEachChild doesn't work if return value is number (e.g. return value of Array.prototype.push).
|
||||
nodes[i].forEachChild(c => void nodes.push(c));
|
||||
}
|
||||
const attributeNodes = nodes
|
||||
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
|
||||
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
|
||||
.filter(([matches]) => matches)
|
||||
.map(([matches, node]) => [matches[2].toLowerCase(), normaliseName(matches[1], tagNameNormalised), node])
|
||||
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
|
||||
.map(([namespace, tag, node]) => ({namespace, tag, node}))
|
||||
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
|
||||
|
||||
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
|
||||
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
|
||||
throw new Error(`Global HTML attributes is not first to be processed`);
|
||||
}
|
||||
|
||||
// Map structure: attr => namespace => tag => config.
|
||||
const attributes = new Map();
|
||||
|
||||
for (const {namespace, tag, node} of attributeNodes) {
|
||||
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
|
||||
for (const n of node.members.filter(n => n.kind === ts.SyntaxKind.PropertySignature)) {
|
||||
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
|
||||
if (reactSpecificAttributes.includes(attrName)) continue;
|
||||
|
||||
const types = n.type.kind === ts.SyntaxKind.UnionType
|
||||
? n.type.types.map(t => t.kind)
|
||||
: [n.type.kind];
|
||||
|
||||
const boolean = types.includes(ts.SyntaxKind.BooleanKeyword);
|
||||
// If types includes boolean and string, make it a boolean attr to prevent it from being removed if empty value.
|
||||
const redundantIfEmpty = !boolean &&
|
||||
(types.includes(ts.SyntaxKind.StringKeyword) || types.includes(ts.SyntaxKind.NumberKeyword));
|
||||
const defaultValue = (defaultAttributeValues[attrName] || [])
|
||||
.filter(a => a.tags.includes(fullyQualifiedTagName))
|
||||
.map(a => a.defaultValue);
|
||||
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
|
||||
if (defaultValue.length > 1) {
|
||||
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValue}`);
|
||||
}
|
||||
const attr = {
|
||||
boolean,
|
||||
redundant_if_empty: redundantIfEmpty,
|
||||
collapse_and_trim: collapseAndTrim,
|
||||
default_value: defaultValue[0],
|
||||
};
|
||||
|
||||
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
|
||||
const namespacesForAttribute = attributes.get(attrName);
|
||||
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
|
||||
const tagsForNSAttribute = namespacesForAttribute.get(namespace);
|
||||
if (tagsForNSAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
|
||||
|
||||
const globalAttr = tagsForNSAttribute.get('*');
|
||||
if (globalAttr) {
|
||||
if (globalAttr.boolean !== attr.boolean
|
||||
|| globalAttr.redundant_if_empty !== attr.redundant_if_empty
|
||||
|| globalAttr.collapse_and_trim !== attr.collapse_and_trim
|
||||
|| globalAttr.default_value !== attr.default_value) {
|
||||
throw new Error(`Global and tag-specific attributes conflict: ${prettyjson(globalAttr)} ${prettyjson(attr)}`);
|
||||
}
|
||||
} else {
|
||||
tagsForNSAttribute.set(tag || '*', attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort output JSON object by property so diffs are clearer.
|
||||
await fs.writeFile(ATTRS_PATH, prettyjson(deepObjectifyMap(attributes)));
|
||||
};
|
||||
|
||||
(async () => {
|
||||
const source = ts.createSourceFile(`react.d.ts`, await fetchReactTypingsSource(), ts.ScriptTarget.ES2019);
|
||||
await processReactTypeDeclarations(source);
|
||||
})();
|
|
@ -1,8 +0,0 @@
|
|||
{
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"request": "^2.88.0",
|
||||
"request-promise-native": "^1.0.8",
|
||||
"typescript": "^3.7.4"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
{
|
||||
"align": [
|
||||
{
|
||||
"tags": [
|
||||
"html:img"
|
||||
],
|
||||
"defaultValue": "bottom"
|
||||
}
|
||||
],
|
||||
"decoding": [
|
||||
{
|
||||
"tags": [
|
||||
"html:img"
|
||||
],
|
||||
"defaultValue": "auto"
|
||||
}
|
||||
],
|
||||
"enctype": [
|
||||
{
|
||||
"tags": [
|
||||
"html:form"
|
||||
],
|
||||
"defaultValue": "application/x-www-form-urlencoded"
|
||||
}
|
||||
],
|
||||
"frameborder": [
|
||||
{
|
||||
"tags": [
|
||||
"html:iframe"
|
||||
],
|
||||
"defaultValue": "1",
|
||||
"isPositiveInteger": true
|
||||
}
|
||||
],
|
||||
"formenctype": [
|
||||
{
|
||||
"tags": [
|
||||
"html:button",
|
||||
"html:input"
|
||||
],
|
||||
"defaultValue": "application/x-www-form-urlencoded"
|
||||
}
|
||||
],
|
||||
"height": [
|
||||
{
|
||||
"tags": [
|
||||
"html:iframe"
|
||||
],
|
||||
"defaultValue": "150",
|
||||
"isPositiveInteger": true
|
||||
}
|
||||
],
|
||||
"importance": [
|
||||
{
|
||||
"tags": [
|
||||
"html:iframe"
|
||||
],
|
||||
"defaultValue": "auto"
|
||||
}
|
||||
],
|
||||
"loading": [
|
||||
{
|
||||
"tags": [
|
||||
"html:iframe",
|
||||
"html:img"
|
||||
],
|
||||
"defaultValue": "eager"
|
||||
}
|
||||
],
|
||||
"media": [
|
||||
{
|
||||
"tags": [
|
||||
"html:style"
|
||||
],
|
||||
"defaultValue": "all"
|
||||
}
|
||||
],
|
||||
"method": [
|
||||
{
|
||||
"tags": [
|
||||
"html:form"
|
||||
],
|
||||
"defaultValue": "get"
|
||||
}
|
||||
],
|
||||
"referrerpolicy": [
|
||||
{
|
||||
"tags": [
|
||||
"html:iframe",
|
||||
"html:img"
|
||||
],
|
||||
"defaultValue": "no-referrer-when-downgrade"
|
||||
}
|
||||
],
|
||||
"rules": [
|
||||
{
|
||||
"tags": [
|
||||
"html:table"
|
||||
],
|
||||
"defaultValue": "none"
|
||||
}
|
||||
],
|
||||
"shape": [
|
||||
{
|
||||
"tags": [
|
||||
"html:area"
|
||||
],
|
||||
"defaultValue": "rect"
|
||||
}
|
||||
],
|
||||
"span": [
|
||||
{
|
||||
"tags": [
|
||||
"html:col",
|
||||
"html:colgroup"
|
||||
],
|
||||
"defaultValue": "1",
|
||||
"isPositiveInteger": true
|
||||
}
|
||||
],
|
||||
"target": [
|
||||
{
|
||||
"tags": [
|
||||
"html:a",
|
||||
"html:form"
|
||||
],
|
||||
"defaultValue": "_self"
|
||||
}
|
||||
],
|
||||
"type": [
|
||||
{
|
||||
"tags": [
|
||||
"html:button"
|
||||
],
|
||||
"defaultValue": "submit"
|
||||
},
|
||||
{
|
||||
"tags": [
|
||||
"html:input"
|
||||
],
|
||||
"defaultValue": "text"
|
||||
},
|
||||
{
|
||||
"tags": [
|
||||
"html:link",
|
||||
"html:style"
|
||||
],
|
||||
"defaultValue": "text/css"
|
||||
}
|
||||
],
|
||||
"width": [
|
||||
{
|
||||
"tags": [
|
||||
"html:iframe"
|
||||
],
|
||||
"defaultValue": "300",
|
||||
"isPositiveInteger": true
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
# Prefixes:
|
||||
# `_` means to lowercase accumulate.
|
||||
# `<` means to accumulate transition pattern as part of current state.
|
||||
# `+` means to accumulate transition pattern as part of next state.
|
||||
# `?` means to look ahead but don't accumulate transition pattern and allow next state to reconsume.
|
||||
|
||||
Text:
|
||||
'\w': ?TextWhitespace
|
||||
'\<': +OpeningTagStart
|
||||
'\</': +ClosingTag
|
||||
'\<!--': +Comment
|
||||
'&': ?TextEntity
|
||||
'': Text
|
||||
|
||||
TextWhitespace:
|
||||
'\w': TextWhitespace
|
||||
'&': ?TextEntity
|
||||
'': ?Text
|
||||
|
||||
Comment:
|
||||
'-->': <Text
|
||||
'': Comment
|
||||
|
||||
ClosingTag:
|
||||
'<tagName>': _ClosingTag
|
||||
'>': <Text
|
||||
|
||||
OpeningTagStart:
|
||||
'\w': ?OpeningTagWhitespace
|
||||
'<tagName>': _OpeningTagStart
|
||||
|
||||
OpeningTagWhitespace:
|
||||
'\w': OpeningTagWhitespace
|
||||
'<attrName>': ?AttrName
|
||||
'>': <Text
|
||||
|
||||
AttrName:
|
||||
'[>=\w]': ?AttrAfterName
|
||||
'<attrName>': _AttrName
|
||||
|
||||
AttrAfterName:
|
||||
'\w': AttrAfterName
|
||||
'>': ?OpeningTagWhitespace
|
||||
'=': +AttrBeforeValue
|
||||
|
||||
AttrBeforeValue:
|
||||
'\w': AttrBeforeValue
|
||||
"'": +AttrSingleQuotedValue
|
||||
'"': +AttrDoubleQuotedValue
|
||||
'': ?AttrUnquotedValue
|
||||
|
||||
AttrSingleQuotedValue:
|
||||
"'": <OpeningTagWhitespace
|
||||
'&': ?AttrValueEntity
|
||||
'\w': ?AttrSingleQuotedValueWhitespace
|
||||
'': AttrSingleQuotedValue
|
||||
|
||||
AttrSingleQuotedValueWhitespace:
|
||||
'\w': AttrSingleQuotedValueWhitespace
|
||||
'&': ?AttrValueEntity
|
||||
'': ?AttrSingleQuotedValue
|
||||
|
||||
AttrDoubleQuotedValue:
|
||||
'"': <OpeningTagWhitespace
|
||||
'&': ?AttrValueEntity
|
||||
'\w': ?AttrDoubleQuotedValueWhitespace
|
||||
'': AttrDoubleQuotedValue
|
||||
|
||||
AttrDoubleQuotedValueWhitespace:
|
||||
'\w': AttrDoubleQuotedValueWhitespace
|
||||
'&': ?AttrValueEntity
|
||||
'': ?AttrDoubleQuotedValue
|
||||
|
||||
AttrUnquotedValue:
|
||||
'\w': ?OpeningTagWhitespace
|
||||
'&': ?AttrValueEntity
|
||||
'': AttrUnquotedValue
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,71 @@
|
|||
import yaml from 'yaml';
|
||||
import {DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||
import {readFileSync, writeFileSync} from 'fs';
|
||||
import {join} from 'path';
|
||||
import {EOL} from 'os';
|
||||
import {parsePattern, TrieBuilder} from './trie';
|
||||
|
||||
const dfa: { [node: string]: { [transition: string]: string } } = yaml.parse(readFileSync(join(DATA_DIR, 'dfa.yaml'), 'utf8'));
|
||||
// These states must always exist; see lex/mod.rs for more details.
|
||||
dfa['TextEntity'] = {};
|
||||
dfa['AttrValueEntity'] = {};
|
||||
dfa['Unknown'] = {};
|
||||
dfa['EOF'] = {};
|
||||
|
||||
const nodes = Object.keys(dfa).sort();
|
||||
|
||||
const rsTransition = (val: string) => {
|
||||
const [_, flag, next] = /^([_<+?]?)(.*)$/.exec(val)!;
|
||||
const consumeMode = {
|
||||
'_': 'AccumulateLowerCase',
|
||||
'': 'Accumulate',
|
||||
'<': 'Current',
|
||||
'+': 'Next',
|
||||
'?': 'Reconsume',
|
||||
}[flag];
|
||||
return `Transition {
|
||||
to: State::${next},
|
||||
consume: ConsumeMode::${consumeMode},
|
||||
}`;
|
||||
};
|
||||
|
||||
const output = `
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum State {
|
||||
${nodes.map((n, i) => `${n} = ${i}`).join(`,${EOL} `)}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ConsumeMode {
|
||||
Current,
|
||||
Next,
|
||||
Reconsume,
|
||||
Accumulate,
|
||||
AccumulateLowerCase,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Transition {
|
||||
// Make pub to allow destructuring.
|
||||
pub to: State,
|
||||
pub consume: ConsumeMode,
|
||||
}
|
||||
|
||||
${nodes.map(n => {
|
||||
const trieBuilder = new TrieBuilder(n.toUpperCase(), 'Transition');
|
||||
for (const [pat, val] of Object.entries(dfa[n])) {
|
||||
if (pat == '') {
|
||||
continue;
|
||||
}
|
||||
trieBuilder.addPattern(parsePattern(pat), rsTransition(val));
|
||||
}
|
||||
if (dfa[n][''] !== undefined) {
|
||||
trieBuilder.fillRemaining(rsTransition(dfa[n]['']));
|
||||
}
|
||||
return trieBuilder.generate();
|
||||
}).join(EOL + EOL)}
|
||||
|
||||
pub static TRANSITIONS: [&'static crate::pattern::TrieNode<Transition>; ${nodes.length}] = [${nodes.map(n => n.toUpperCase()).join(', ')}];
|
||||
`;
|
||||
|
||||
writeFileSync(join(RUST_OUT_DIR, 'dfa.rs'), output);
|
|
@ -0,0 +1,28 @@
|
|||
import {readFileSync, writeFileSync} from 'fs';
|
||||
import {join} from 'path';
|
||||
import {byteStringLiteral, DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||
import {parsePattern, TrieBuilder} from './trie';
|
||||
|
||||
const entities: {[name: string]: {codepoints: number[]; characters: string;}} = JSON.parse(readFileSync(join(DATA_DIR, 'entities.json'), 'utf8'));
|
||||
|
||||
const trieBuilder = new TrieBuilder('ENTITY', "EntityType");
|
||||
trieBuilder.addPattern(parsePattern("&#[0-9]"), 'EntityType::Dec');
|
||||
trieBuilder.addPattern(parsePattern("&#x[0-9a-fA-F]"), 'EntityType::Hex');
|
||||
for (const [rep, entity] of Object.entries(entities)) {
|
||||
const bytes = Buffer.from(entity.characters, 'utf8');
|
||||
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
|
||||
const val = byteStringLiteral(rep.length < bytes.length ? [...rep].map(c => c.charCodeAt(0)) : [...bytes]);
|
||||
trieBuilder.add(rep, `EntityType::Named(${val})`);
|
||||
}
|
||||
|
||||
const output = `
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum EntityType {
|
||||
Named(&'static [u8]),
|
||||
Dec,
|
||||
Hex,
|
||||
}
|
||||
|
||||
${trieBuilder.generate()}
|
||||
`;
|
||||
writeFileSync(join(RUST_OUT_DIR, 'entities.rs'), output);
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@types/node": "^14.0.5",
|
||||
"ts-node": "^8.10.1",
|
||||
"typescript": "^3.7.4",
|
||||
"yaml": "^1.10.0"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
import {readFileSync, writeFileSync} from 'fs';
|
||||
import {DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||
import {join} from 'path';
|
||||
import {EOL} from 'os';
|
||||
|
||||
const patterns: {[name: string]: string} = JSON.parse(readFileSync(join(DATA_DIR, 'patterns.json'), 'utf8'));
|
||||
|
||||
const chr = (str: string, char: number) => str.charCodeAt(char);
|
||||
|
||||
const buildPattern = (seq: string): string => {
|
||||
const dfa = Array.from({length: 256}, () => Array(seq.length).fill(0));
|
||||
|
||||
dfa[chr(seq, 0)][0] = 1;
|
||||
let x = 0;
|
||||
let j = 1;
|
||||
while (j < seq.length) {
|
||||
for (let c = 0; c < 256; c++) {
|
||||
dfa[c][j] = dfa[c][x];
|
||||
}
|
||||
dfa[chr(seq, j)][j] = j + 1;
|
||||
x = dfa[chr(seq, j)][x];
|
||||
j += 1;
|
||||
}
|
||||
|
||||
return `crate::pattern::SinglePattern::prebuilt(&[${dfa.flat().join(', ')}], ${seq.length})`;
|
||||
};
|
||||
|
||||
const output = Object.entries(patterns)
|
||||
.map(([name, pattern]) => `pub static ${name}: &crate::pattern::SinglePattern = &${buildPattern(pattern)};`);
|
||||
|
||||
writeFileSync(join(RUST_OUT_DIR, 'patterns.rs'), output.join(EOL));
|
|
@ -0,0 +1,171 @@
|
|||
import {EOL} from 'os';
|
||||
|
||||
const customCharClasses = {
|
||||
tagName: '[a-zA-Z-]',
|
||||
attrName: '[a-zA-Z-]',
|
||||
};
|
||||
|
||||
const whitespaceClass = [' ', '\r', '\n', '\t', '\v', '\f'];
|
||||
|
||||
const charRange = (from: string, to: string) => {
|
||||
const res = [];
|
||||
for (let i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
|
||||
res.push(String.fromCharCode(i));
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
const parsePatternEscape = (pat: string, at: number): string[] => {
|
||||
switch (pat[at]) {
|
||||
case '\\':
|
||||
return ['\\'];
|
||||
case ']':
|
||||
return [']'];
|
||||
case '<':
|
||||
return ['<'];
|
||||
case 'w':
|
||||
return whitespaceClass;
|
||||
default:
|
||||
throw new Error(`Unknown pattern escape: ${pat[at]}`);
|
||||
}
|
||||
};
|
||||
|
||||
const parsePatternClass = (pat: string, from: number): [string[], number] => {
|
||||
const chars: string[] = [];
|
||||
for (let i = from; i < pat.length; i++) {
|
||||
switch (pat[i]) {
|
||||
case '\\':
|
||||
chars.push(...parsePatternEscape(pat, ++i));
|
||||
break;
|
||||
case ']':
|
||||
return [chars, i];
|
||||
default:
|
||||
if (pat[i + 1] === '-' && pat[i + 2] !== undefined) {
|
||||
chars.push(...charRange(pat[i], pat[i + 2]));
|
||||
i += 2;
|
||||
} else {
|
||||
chars.push(pat[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw new Error(`Unexpected end of pattern: ${pat}`);
|
||||
};
|
||||
|
||||
const parsePatternCustomClass = (pat: string, from: number): [string[], number] => {
|
||||
const endIdx = pat.indexOf('>', from);
|
||||
if (endIdx == -1) throw new Error(`Unexpected end of pattern: ${pat}`);
|
||||
return [parsePatternClass(customCharClasses[pat.slice(from, endIdx)], 1)[0], endIdx];
|
||||
};
|
||||
|
||||
export const parsePattern = (pat: string): string[][] => {
|
||||
const res: string[][] = [];
|
||||
for (let i = 0; i < pat.length; i++) {
|
||||
switch (pat[i]) {
|
||||
case '\\':
|
||||
res.push(parsePatternEscape(pat, ++i));
|
||||
break;
|
||||
case '[':
|
||||
const sg = parsePatternClass(pat, i + 1);
|
||||
res.push(sg[0]);
|
||||
i = sg[1];
|
||||
break;
|
||||
case '<':
|
||||
const cc = parsePatternCustomClass(pat, i + 1);
|
||||
res.push(cc[0]);
|
||||
i = cc[1];
|
||||
break;
|
||||
default:
|
||||
res.push([pat[i]]);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
type Node = {
|
||||
children: Node[];
|
||||
value?: string;
|
||||
};
|
||||
|
||||
const createNode = (value?: string) => ({value, children: []});
|
||||
|
||||
export class TrieBuilder {
|
||||
private readonly root: Node = createNode();
|
||||
|
||||
private readonly variables: string[] = [];
|
||||
private nextId: number = 0;
|
||||
private readonly codeCache: Map<string, string> = new Map();
|
||||
|
||||
constructor (
|
||||
private readonly name: string,
|
||||
private readonly valueType: string,
|
||||
) {
|
||||
}
|
||||
|
||||
fillRemaining (val: string): this {
|
||||
const {children} = this.root;
|
||||
for (let i = 0; i < 256; i++) {
|
||||
children[i] = children[i] || createNode(val);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
add (seq: string, val: string): this {
|
||||
let cur: Node = this.root;
|
||||
for (let i = 0; i < seq.length; i++) {
|
||||
const c = seq.charCodeAt(i);
|
||||
if (c > 255) throw new Error('Not a byte');
|
||||
cur = cur.children[c] = cur.children[c] || createNode();
|
||||
}
|
||||
cur.value = val;
|
||||
return this;
|
||||
}
|
||||
|
||||
addPattern (pattern: string[][], val: string): this {
|
||||
let cur: Node[] = [this.root];
|
||||
for (const cls of pattern) {
|
||||
const next: Node[] = [];
|
||||
for (let i = 0; i < cls.length; i++) {
|
||||
if (cls[i].length !== 1) throw new Error(`Not a byte`);
|
||||
const c = cls[i].charCodeAt(0);
|
||||
if (c > 255) throw new Error('Not a byte');
|
||||
next.push(...cur.map(n => n.children[c] = n.children[c] || createNode()));
|
||||
}
|
||||
cur = next;
|
||||
}
|
||||
cur.forEach(n => n.value = val);
|
||||
return this;
|
||||
}
|
||||
|
||||
// Generate the code for a node's variable name and value, and return the name.
|
||||
private generateNode (node: Node): string {
|
||||
// Only generate elements up to the last non-undefined child to cut down on size of array.
|
||||
const children = Array.from(
|
||||
{length: node.children.length},
|
||||
(_, i) => node.children[i] ? `Some(${this.generateNode(node.children[i])})` : 'None',
|
||||
).join(', ');
|
||||
|
||||
const value = node.value === undefined ? 'None' : `Some(${node.value})`;
|
||||
const varValue = `&crate::pattern::TrieNode {
|
||||
value: ${value},
|
||||
children: &[${children}],
|
||||
}`;
|
||||
const existingVarName = this.codeCache.get(varValue);
|
||||
if (existingVarName) {
|
||||
return existingVarName;
|
||||
}
|
||||
|
||||
const name = `${this.name}_NODE_${this.nextId++}`;
|
||||
this.variables.push(`static ${name}: &'static crate::pattern::TrieNode<${this.valueType}> = ${varValue};`);
|
||||
this.codeCache.set(varValue, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
generate (): string {
|
||||
this.variables.splice(0, this.variables.length);
|
||||
this.nextId = 0;
|
||||
const rootName = this.generateNode(this.root);
|
||||
// Make root node public and use proper name.
|
||||
return this.variables.join(EOL + EOL).replace(`static ${rootName}`, `pub static ${this.name}`);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"include": [
|
||||
"*.ts"
|
||||
],
|
||||
"compilerOptions": {
|
||||
"allowJs": false,
|
||||
"alwaysStrict": true,
|
||||
"declaration": true,
|
||||
"esModuleInterop": true,
|
||||
"lib": [
|
||||
"es2020"
|
||||
],
|
||||
"module": "commonjs",
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noImplicitAny": true,
|
||||
"noImplicitReturns": true,
|
||||
"noImplicitThis": true,
|
||||
"noUnusedParameters": true,
|
||||
"outDir": "dist",
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"strictFunctionTypes": true,
|
||||
"strictNullChecks": true,
|
||||
"strictPropertyInitialization": true,
|
||||
"suppressImplicitAnyIndexErrors": true,
|
||||
"target": "es6"
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue