Precompute generated files instead of building them every compile

This commit is contained in:
Wilson Lin 2020-06-19 17:16:23 +10:00
parent 2591f2879d
commit 99a13e3243
21 changed files with 3976 additions and 5030 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
/target
/src/gen/

59
Cargo.lock generated
View File

@ -23,11 +23,6 @@ name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cascade"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.10"
@ -47,11 +42,6 @@ dependencies = [
"vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fastrie"
version = "0.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "getrandom"
version = "0.1.14"
@ -82,19 +72,10 @@ dependencies = [
name = "hyperbuild"
version = "0.0.45"
dependencies = [
"cascade 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"fastrie 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itoa"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -241,39 +222,6 @@ dependencies = [
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ryu"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde"
version = "1.0.110"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_derive"
version = "1.0.110"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_json"
version = "1.0.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "siphasher"
version = "0.3.3"
@ -387,14 +335,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum cascade 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "31c9ddf4a1a9dbf82e130117f81b0c292fb5416000cbaba11eb92a65face2613"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
"checksum fastrie 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "16a8e873087682100de15eaccd3f4671c44fe589bd8989a854c061c961884d16"
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
"checksum itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
"checksum libc 0.2.69 (registry+https://github.com/rust-lang/crates.io-index)" = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005"
"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
@ -412,10 +357,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
"checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
"checksum ryu 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
"checksum serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
"checksum serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
"checksum serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)" = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2"
"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
"checksum structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"

View File

@ -10,20 +10,14 @@ repository = "https://github.com/wilsonzlin/hyperbuild.git"
version = "0.0.45"
authors = ["Wilson Lin <code@wilsonl.in>"]
edition = "2018"
include = ["/gen/*.json", "/src/**/*", "/build.rs", "/Cargo.toml", "/LICENSE", "/README.md"]
include = ["/src/**/*", "/Cargo.toml", "/LICENSE", "/README.md"]
[badges]
maintenance = { status = "actively-developed" }
[dependencies]
cascade = "0.1.4"
fastrie = "0.0.6"
phf = { version = "0.8.0", features = ["macros"] }
structopt = "0.3.5"
[build-dependencies]
serde = { version = "1.0.104", features = ["derive"] }
serde_json = "1.0.44"
[profile.release]
panic = 'abort'

309
build.rs
View File

@ -1,309 +0,0 @@
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::Write;
use std::ops::{Index, IndexMut};
use std::path::Path;
use serde::{Deserialize, Serialize};
fn create_byte_string_literal(bytes: &[u8]) -> String {
format!("b\"{}\"", bytes
.iter()
.map(|&b| if b >= b' ' && b <= b'~' && b != b'\\' && b != b'"' {
(b as char).to_string()
} else {
format!("\\x{:02x}", b)
})
.collect::<String>())
}
fn read_json<T>(name: &str) -> T where for<'de> T: Deserialize<'de> {
let patterns_path = Path::new("gen").join(format!("{}.json", name));
let patterns_file = File::open(patterns_path).unwrap();
serde_json::from_reader(patterns_file).unwrap()
}
fn write_rs(name: &str, code: String) -> () {
let out_dir = env::var("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join(format!("gen_{}.rs", name));
let mut dest_file = File::create(&dest_path).unwrap();
dest_file.write_all(code.as_bytes()).unwrap();
}
fn name_words(n: &str) -> Vec<String> {
n.split(' ').map(|w| w.to_string()).collect::<Vec<String>>()
}
fn snake_case(n: &Vec<String>) -> String {
n.iter().map(|w| w.to_uppercase()).collect::<Vec<String>>().join("_")
}
fn camel_case(n: &Vec<String>) -> String {
n.iter().map(|w| format!(
"{}{}",
w.as_bytes()[0].to_ascii_uppercase() as char,
std::str::from_utf8(&w.as_bytes()[1..]).unwrap(),
)).collect::<Vec<String>>().join("")
}
pub struct TwoDimensionalArray {
data: Vec<usize>,
cols: usize,
}
impl TwoDimensionalArray {
pub fn new(rows: usize, cols: usize) -> TwoDimensionalArray {
TwoDimensionalArray {
data: vec![0usize; rows * cols],
cols,
}
}
pub fn prebuilt(data: Vec<usize>, cols: usize) -> TwoDimensionalArray {
TwoDimensionalArray { data, cols }
}
}
type TwoDimensionalArrayIndex = (usize, usize);
impl Index<TwoDimensionalArrayIndex> for TwoDimensionalArray {
type Output = usize;
fn index(&self, (row, col): TwoDimensionalArrayIndex) -> &Self::Output {
&self.data[row * self.cols + col]
}
}
impl IndexMut<TwoDimensionalArrayIndex> for TwoDimensionalArray {
fn index_mut(&mut self, (row, col): TwoDimensionalArrayIndex) -> &mut Self::Output {
&mut self.data[row * self.cols + col]
}
}
fn build_pattern(pattern: String) -> String {
assert!(pattern.is_ascii());
let seq = pattern.as_bytes();
let dfa = &mut TwoDimensionalArray::new(256, seq.len());
dfa[(seq[0] as usize, 0)] = 1;
let mut x = 0;
let mut j = 1;
while j < seq.len() {
for c in 0..256 {
dfa[(c, j)] = dfa[(c, x)];
};
dfa[(seq[j] as usize, j)] = j + 1;
x = dfa[(seq[j] as usize, x)];
j += 1;
};
format!(
"crate::pattern::SinglePattern::prebuilt(&[{}], {})",
dfa.data.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(", "),
seq.len(),
)
}
#[derive(Serialize, Deserialize)]
struct TagAttr {
boolean: bool,
redundant_if_empty: bool,
collapse_and_trim: bool,
default_value: Option<String>,
}
impl TagAttr {
fn code(&self) -> String {
format!(r"
AttributeMinification {{
boolean: {boolean},
redundant_if_empty: {redundant_if_empty},
collapse_and_trim: {collapse_and_trim},
default_value: {default_value},
}}
",
boolean = self.boolean,
redundant_if_empty = self.redundant_if_empty,
collapse_and_trim = self.collapse_and_trim,
default_value = match &self.default_value {
Some(val) => format!("Some({})", create_byte_string_literal(val.as_bytes())),
None => "None".to_string(),
},
)
}
}
fn generate_attr_map() {
let attrs: HashMap<String, HashMap<String, HashMap<String, TagAttr>>> = read_json("attrs");
let mut code = String::new();
for (attr_name, namespaces) in attrs.iter() {
let mut by_namespace_code = String::new();
by_namespace_code.push_str(format!("static {}_ATTR: ByNamespace = ByNamespace {{\n", attr_name.to_uppercase()).as_str());
for namespace in ["html".to_string(), "svg".to_string()].iter() {
by_namespace_code.push_str(format!("\t{}: ", namespace).as_str());
match namespaces.get(namespace) {
None => by_namespace_code.push_str("None"),
Some(tags_map) => {
if let Some(global_attr) = tags_map.get("*") {
code.push_str(format!(
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements({});\n\n",
namespace.to_uppercase(),
attr_name.to_uppercase(),
global_attr.code(),
).as_str());
} else {
code.push_str(format!(
"static {}_{}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {{\n{}\n}});\n\n",
namespace.to_uppercase(),
attr_name.to_uppercase(),
tags_map
.iter()
.map(|(tag_name, tag_attr)| format!(
"b\"{}\" => {}",
tag_name,
tag_attr.code(),
))
.collect::<Vec<String>>()
.join(",\n"),
).as_str());
};
by_namespace_code.push_str(format!("Some({}_{}_ATTR)", namespace.to_uppercase(), attr_name.to_uppercase()).as_str());
}
};
by_namespace_code.push_str(",\n");
};
by_namespace_code.push_str("};\n\n");
code.push_str(&by_namespace_code);
};
code.push_str("pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n");
for attr_name in attrs.keys() {
code.push_str(format!("\tb\"{}\" => {}_ATTR,\n", attr_name, attr_name.to_uppercase()).as_str());
};
code.push_str("});\n\n");
write_rs("attrs", code);
}
#[derive(Serialize, Deserialize)]
struct Entity {
codepoints: Vec<u32>,
characters: String,
}
pub struct TrieBuilderNode {
value: Option<String>,
children: Vec<Option<TrieBuilderNode>>,
}
struct TrieBuilderGenerationContext<'t, 'v, 'o> {
trie_name: &'t str,
value_type: &'v str,
next_id: usize,
out: &'o mut String,
}
impl<'t, 'v, 'o> TrieBuilderGenerationContext<'t, 'v, 'o> {
pub fn id(&mut self) -> usize {
let next = self.next_id;
self.next_id += 1;
next
}
}
impl TrieBuilderNode {
pub fn new() -> TrieBuilderNode {
let mut children = Vec::new();
for _ in 0..256 {
children.push(None);
};
TrieBuilderNode { value: None, children }
}
pub fn add(&mut self, seq: &[u8], value: String) -> () {
let mut current = self;
for c in seq.iter() {
current = current.children[*c as usize].get_or_insert_with(|| TrieBuilderNode::new());
};
current.value.replace(value);
}
fn _generated_node_var_name(&self, trie_name: &str, node_id: usize) -> String {
format!("{trie_name}_NODE_{node_id}", trie_name = trie_name, node_id = node_id)
}
fn _generate(&self, ctx: &mut TrieBuilderGenerationContext) -> usize {
let children = self.children.iter().map(|c| match c {
None => "None".to_string(),
Some(c) => {
let child_id = c._generate(ctx);
format!("Some({})", self._generated_node_var_name(ctx.trie_name, child_id))
}
}).collect::<Vec<String>>().join(", ");
let id = ctx.id();
let code = format!(
"static {var_name}: &'static crate::pattern::TrieNode<{value_type}> = &crate::pattern::TrieNode {{\n\tvalue: {value},\n\tchildren: [{children}],\n}};\n\n",
var_name = self._generated_node_var_name(ctx.trie_name, id),
value_type = ctx.value_type,
value = self.value.as_ref().map_or("None".to_string(), |v| format!("Some({})", v)),
children = children,
);
ctx.out.push_str(code.as_str());
id
}
pub fn generate(&self, trie_name: &str, value_type: &str) -> String {
let mut out = String::new();
let mut ctx = TrieBuilderGenerationContext {
trie_name,
value_type,
next_id: 0,
out: &mut out,
};
let root_id = self._generate(&mut ctx);
// Make root node public and use proper name.
ctx.out.replace(
format!("static {}", self._generated_node_var_name(trie_name, root_id)).as_str(),
format!("pub static {}", trie_name).as_str()
)
}
}
fn generate_entities() {
// Read named entities map from JSON file.
let entities: HashMap<String, Entity> = read_json("entities");
// Add entities to trie builder.
let mut trie_builder: TrieBuilderNode = TrieBuilderNode::new();
for (rep, entity) in entities {
let val = if rep.as_bytes().len() < entity.characters.as_bytes().len() {
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
println!("Entity {} is shorter than decoded UTF-8 bytes...", rep);
// Include '&' in value.
create_byte_string_literal(rep.as_bytes())
} else {
create_byte_string_literal(entity.characters.as_bytes())
};
trie_builder.add(&(rep.as_bytes())[1..], val);
};
// Write trie code to output Rust file.
write_rs("entities", trie_builder.generate(
"ENTITY_REFERENCES",
"&'static [u8]",
));
}
fn generate_patterns() {
let patterns: HashMap<String, String> = read_json("patterns");
for (name, pattern) in patterns {
let mut code = String::new();
code.push_str(format!("static {}: &crate::pattern::SinglePattern = &{};", name, build_pattern(pattern)).as_str());
write_rs(format!("pattern_{}", name).as_str(), code);
};
}
fn main() {
generate_attr_map();
generate_entities();
generate_patterns();
}

2
gen/.gitignore vendored
View File

@ -1,2 +1,2 @@
node_modules/
build/*.d.ts
/package-lock.json

37
gen/_common.ts Normal file
View File

@ -0,0 +1,37 @@
import { join } from "path";
import {mkdirSync, writeFileSync} from 'fs';
export const RUST_OUT_DIR = join(__dirname, '..', 'src', 'gen');
try {
mkdirSync(RUST_OUT_DIR);
} catch (err) {
if (err.code !== 'EEXIST') {
throw err;
}
}
writeFileSync(join(RUST_OUT_DIR, 'mod.rs'), `
pub mod attrs;
pub mod dfa;
pub mod entities;
pub mod patterns;
`);
export const DATA_DIR = join(__dirname, 'data');
export const leftPad = (str: string, n: number) => '0'.repeat(n - str.length) + str;
export const prettyJson = (v: any) => JSON.stringify(v, null, 2);
export const byteStringLiteral = (bytes: number[]): string => 'b"' + bytes.map(c => {
if (c > 255) throw new Error('Not a byte');
// 0x20 == ' '.
// 0x7E == '~'.
// 0x5C == '\\'.
// 0x22 == '"'.
if (c >= 0x20 && c <= 0x7E && c != 0x5C && c != 0x22) {
return String.fromCharCode(c);
} else {
return `\\x${leftPad(c.toString(16), 2)}`;
}
}).join('') + '"';

File diff suppressed because it is too large Load Diff

210
gen/attrs.ts Normal file
View File

@ -0,0 +1,210 @@
import {readFileSync, writeFileSync} from 'fs';
import ts, {Node, SourceFile, SyntaxKind, Type} from 'typescript';
import {join} from 'path';
import {DATA_DIR, prettyJson, RUST_OUT_DIR} from './_common';
const reactDeclarations = readFileSync(join(__dirname, 'data', 'react.d.ts'), 'utf8');
// TODO Consider and check behaviour when value matches case insensitively, after trimming whitespace, numerically (for number values), etc.
// TODO This file is currently manually sourced and written. Try to get machine-readable spec and automate.
const defaultAttributeValues: {
[attr: string]: {
tags: string[];
defaultValue: string;
isPositiveInteger?: boolean;
}[];
} = JSON.parse(readFileSync(join(DATA_DIR, 'attrs.json'), 'utf8'));
const tagNameNormalised = {
'anchor': 'a',
};
const attrNameNormalised = {
'classname': 'class',
};
const reactSpecificAttributes = [
'defaultChecked', 'defaultValue', 'suppressContentEditableWarning', 'suppressHydrationWarning',
];
const collapsibleAndTrimmable = {
'class': ['html:*'],
'd': ['svg:*'],
};
// TODO Is escapedText the API for getting name?
const getNameOfNode = (n: any) => n.name.escapedText;
const normaliseName = (name: string, norms: { [name: string]: string }) => [name.toLowerCase()].map(n => norms[n] || n)[0];
type AttrConfig = {
boolean: boolean;
redundantIfEmpty: boolean;
collapseAndTrim: boolean;
defaultValue?: string;
};
const rsTagAttr = ({
redundantIfEmpty,
defaultValue,
collapseAndTrim,
boolean,
}: AttrConfig) => `AttributeMinification {
boolean: ${boolean},
redundant_if_empty: ${redundantIfEmpty},
collapse_and_trim: ${collapseAndTrim},
default_value: ${defaultValue == undefined ? 'None' : `Some(b"${defaultValue}")`},
}`;
const processReactTypeDeclarations = (source: SourceFile) => {
const nodes: Node[] = [source];
// Use index-based loop to keep iterating as nodes array grows.
for (let i = 0; i < nodes.length; i++) {
// forEachChild doesn't work if return value is number (e.g. return value of Array.prototype.push).
nodes[i].forEachChild(c => void nodes.push(c));
}
const attributeNodes = nodes
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
.filter(([matches]) => !!matches)
.map(([matches, node]) => [matches![2].toLowerCase(), normaliseName(matches![1], tagNameNormalised), node])
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
.map(([namespace, tag, node]) => ({namespace, tag, node}))
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
throw new Error(`Global HTML attributes is not first to be processed`);
}
// Map structure: attr => namespace => tag => config.
const attributes = new Map<string, Map<'html' | 'svg', Map<string, AttrConfig>>>();
for (const {namespace, tag, node} of attributeNodes) {
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
for (const n of node.members.filter((n: Node) => n.kind === ts.SyntaxKind.PropertySignature)) {
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
if (reactSpecificAttributes.includes(attrName)) continue;
const types: SyntaxKind[] = n.type.kind === ts.SyntaxKind.UnionType
? n.type.types.map((t: Node) => t.kind)
: [n.type.kind];
const boolean = types.includes(ts.SyntaxKind.BooleanKeyword);
// If types includes boolean and string, make it a boolean attr to prevent it from being removed if empty value.
const redundantIfEmpty = !boolean && types.some(t => t === ts.SyntaxKind.StringKeyword || t === ts.SyntaxKind.NumberKeyword);
const defaultValues = (defaultAttributeValues[attrName] || [])
.filter(a => a.tags.includes(fullyQualifiedTagName))
.map(a => a.defaultValue);
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
if (defaultValues.length > 1) {
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValues}`);
}
const attr: AttrConfig = {
boolean,
redundantIfEmpty,
collapseAndTrim,
defaultValue: defaultValues[0],
};
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
const namespacesForAttribute = attributes.get(attrName)!;
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
const tagsForNsAttribute = namespacesForAttribute.get(namespace)!;
if (tagsForNsAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
const globalAttr = tagsForNsAttribute.get('*');
if (globalAttr) {
if (globalAttr.boolean !== attr.boolean
|| globalAttr.redundantIfEmpty !== attr.redundantIfEmpty
|| globalAttr.collapseAndTrim !== attr.collapseAndTrim
|| globalAttr.defaultValue !== attr.defaultValue) {
throw new Error(`Global and tag-specific attributes conflict: ${prettyJson(globalAttr)} ${prettyJson(attr)}`);
}
} else {
tagsForNsAttribute.set(tag || '*', attr);
}
}
}
let code = `
use crate::spec::tag::ns::Namespace;
pub struct AttributeMinification {
pub boolean: bool,
pub redundant_if_empty: bool,
pub collapse_and_trim: bool,
pub default_value: Option<&'static [u8]>,
}
pub enum AttrMapEntry {
AllNamespaceElements(AttributeMinification),
SpecificNamespaceElements(phf::Map<&'static [u8], AttributeMinification>),
}
#[derive(Clone, Copy)]
pub struct ByNamespace {
// Make pub so this struct can be statically created in gen/attrs.rs.
pub html: Option<&'static AttrMapEntry>,
pub svg: Option<&'static AttrMapEntry>,
}
impl ByNamespace {
fn get(&self, ns: Namespace) -> Option<&'static AttrMapEntry> {
match ns {
Namespace::Html => self.html,
Namespace::Svg => self.svg,
}
}
}
pub struct AttrMap(phf::Map<&'static [u8], ByNamespace>);
impl AttrMap {
pub const fn new(map: phf::Map<&'static [u8], ByNamespace>) -> AttrMap {
AttrMap(map)
}
pub fn get(&self, ns: Namespace, tag: &[u8], attr: &[u8]) -> Option<&AttributeMinification> {
self.0.get(attr).and_then(|namespaces| namespaces.get(ns)).and_then(|entry| match entry {
AttrMapEntry::AllNamespaceElements(min) => Some(min),
AttrMapEntry::SpecificNamespaceElements(map) => map.get(tag),
})
}
}
`;
for (const [attrName, namespaces] of attributes) {
let byNsCode = '';
byNsCode += `static ${attrName.toUpperCase()}_ATTR: ByNamespace = ByNamespace {\n`;
for (const ns of ['html', 'svg'] as const) {
byNsCode += `\t${ns}: `;
const tagsMap = namespaces.get(ns);
if (!tagsMap) {
byNsCode += 'None';
} else {
const globalAttr = tagsMap.get('*');
if (globalAttr) {
code += `static ${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR: &AttrMapEntry = &AttrMapEntry::AllNamespaceElements(${rsTagAttr(globalAttr)});\n\n`;
} else {
code += `static ${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR: &AttrMapEntry = &AttrMapEntry::SpecificNamespaceElements(phf::phf_map! {\n${
[...tagsMap].map(([tagName, tagAttr]) => `b\"${tagName}\" => ${rsTagAttr(tagAttr)}`).join(',\n')
}\n});\n\n`;
}
byNsCode += `Some(${ns.toUpperCase()}_${attrName.toUpperCase()}_ATTR)`;
}
byNsCode += ',\n';
}
byNsCode += '};\n\n';
code += byNsCode;
}
code += 'pub static ATTRS: AttrMap = AttrMap::new(phf::phf_map! {\n';
for (const attr_name of attributes.keys()) {
code += `\tb\"${attr_name}\" => ${attr_name.toUpperCase()}_ATTR,\n`;
}
code += '});\n\n';
return code;
};
const source = ts.createSourceFile(`react.d.ts`, reactDeclarations, ts.ScriptTarget.ES2020);
writeFileSync(join(RUST_OUT_DIR, 'attrs.rs'), processReactTypeDeclarations(source));

View File

@ -1,215 +0,0 @@
const request = require('request-promise-native');
const {promises: fs} = require('fs');
const ts = require('typescript');
const path = require('path');
const compareEntryNames = (a, b) => a[0].localeCompare(b[0]);
const deepObjectifyMap = map => Object.fromEntries(
[...map.entries()]
.map(([key, value]) => [key, value instanceof Map ? deepObjectifyMap(value) : value])
.sort(compareEntryNames)
);
const fromCamelCase = camelCase => camelCase.split(/(?=^|[A-Z])/).map(w => w.toLowerCase());
const prettyjson = v => JSON.stringify(v, null, 2);
const ATTRS_PATH = path.join(__dirname, '..', 'attrs.json');
const REACT_TYPINGS_URL = 'https://raw.githubusercontent.com/DefinitelyTyped/DefinitelyTyped/master/types/react/index.d.ts';
const REACT_TYPINGS_FILE = path.join(__dirname, 'react.d.ts');
const fetchReactTypingsSource = async () => {
try {
return await fs.readFile(REACT_TYPINGS_FILE, 'utf8');
} catch (err) {
if (err.code !== 'ENOENT') {
throw err;
}
const source = await request(REACT_TYPINGS_URL);
await fs.writeFile(REACT_TYPINGS_FILE, source);
return source;
}
};
const tagNameNormalised = {
'anchor': 'a',
};
const attrNameNormalised = {
'classname': 'class',
};
const reactSpecificAttributes = [
'defaultChecked', 'defaultValue', 'suppressContentEditableWarning', 'suppressHydrationWarning',
];
// TODO Consider and check behaviour when value matches case insensitively, after trimming whitespace, numerically (for number values), etc.
// TODO This is currently manually sourced and written. Try to get machine-readable spec and automate.
const defaultAttributeValues = {
'align': [{
tags: ['html:img'],
defaultValue: 'bottom',
}],
'decoding': [{
tags: ['html:img'],
defaultValue: 'auto',
}],
'enctype': [{
tags: ['html:form'],
defaultValue: 'application/x-www-form-urlencoded',
}],
'frameborder': [{
tags: ['html:iframe'],
defaultValue: '1',
isPositiveInteger: true,
}],
'formenctype': [{
tags: ['html:button', 'html:input'],
defaultValue: 'application/x-www-form-urlencoded',
}],
'height': [{
tags: ['html:iframe'],
defaultValue: '150',
isPositiveInteger: true,
}],
'importance': [{
tags: ['html:iframe'],
defaultValue: 'auto',
}],
'loading': [{
tags: ['html:iframe', 'html:img'],
defaultValue: 'eager',
}],
'media': [{
tags: ['html:style'],
defaultValue: 'all',
}],
'method': [{
tags: ['html:form'],
defaultValue: 'get',
}],
'referrerpolicy': [{
tags: ['html:iframe', 'html:img'],
defaultValue: 'no-referrer-when-downgrade',
}],
'rules': [{
tags: ['html:table'],
defaultValue: 'none',
}],
'shape': [{
tags: ['html:area'],
defaultValue: 'rect',
}],
'span': [{
tags: ['html:col', 'html:colgroup'],
defaultValue: '1',
isPositiveInteger: true,
}],
'target': [{
tags: ['html:a', 'html:form'],
defaultValue: '_self',
}],
'type': [{
tags: ['html:button'],
defaultValue: 'submit',
}, {
tags: ['html:input'],
defaultValue: 'text',
}, {
tags: ['html:link', 'html:style'],
defaultValue: 'text/css',
}],
'width': [{
tags: ['html:iframe'],
defaultValue: '300',
isPositiveInteger: true,
}]
};
const collapsibleAndTrimmable = {
'class': ['html:*'],
'd': ['svg:*'],
};
// TODO Is escapedText the API for getting name?
const getNameOfNode = n => n.name.escapedText;
const normaliseName = (name, norms) => [name.toLowerCase()].map(n => norms[n] || n)[0];
const processReactTypeDeclarations = async (source) => {
const nodes = [source];
// Use index-based loop to keep iterating as nodes array grows.
for (let i = 0; i < nodes.length; i++) {
// forEachChild doesn't work if return value is number (e.g. return value of Array.prototype.push).
nodes[i].forEachChild(c => void nodes.push(c));
}
const attributeNodes = nodes
.filter(n => n.kind === ts.SyntaxKind.InterfaceDeclaration)
.map(n => [/^([A-Za-z]*)(HTML|SVG)Attributes/.exec(getNameOfNode(n)), n])
.filter(([matches]) => matches)
.map(([matches, node]) => [matches[2].toLowerCase(), normaliseName(matches[1], tagNameNormalised), node])
.filter(([namespace, tagName]) => namespace !== 'html' || !['all', 'webview'].includes(tagName))
.map(([namespace, tag, node]) => ({namespace, tag, node}))
.sort((a, b) => a.namespace.localeCompare(b.namespace) || a.tag.localeCompare(b.tag));
// Process global HTML attributes first as they also appear on some specific HTML tags but we don't want to keep the specific ones if they're global.
if (attributeNodes[0].namespace !== 'html' || attributeNodes[0].tag !== '') {
throw new Error(`Global HTML attributes is not first to be processed`);
}
// Map structure: attr => namespace => tag => config.
const attributes = new Map();
for (const {namespace, tag, node} of attributeNodes) {
const fullyQualifiedTagName = [namespace, tag || '*'].join(':');
for (const n of node.members.filter(n => n.kind === ts.SyntaxKind.PropertySignature)) {
const attrName = normaliseName(getNameOfNode(n), attrNameNormalised);
if (reactSpecificAttributes.includes(attrName)) continue;
const types = n.type.kind === ts.SyntaxKind.UnionType
? n.type.types.map(t => t.kind)
: [n.type.kind];
const boolean = types.includes(ts.SyntaxKind.BooleanKeyword);
// If types includes boolean and string, make it a boolean attr to prevent it from being removed if empty value.
const redundantIfEmpty = !boolean &&
(types.includes(ts.SyntaxKind.StringKeyword) || types.includes(ts.SyntaxKind.NumberKeyword));
const defaultValue = (defaultAttributeValues[attrName] || [])
.filter(a => a.tags.includes(fullyQualifiedTagName))
.map(a => a.defaultValue);
const collapseAndTrim = (collapsibleAndTrimmable[attrName] || []).includes(fullyQualifiedTagName);
if (defaultValue.length > 1) {
throw new Error(`Tag-attribute combination <${fullyQualifiedTagName} ${attrName}> has multiple default values: ${defaultValue}`);
}
const attr = {
boolean,
redundant_if_empty: redundantIfEmpty,
collapse_and_trim: collapseAndTrim,
default_value: defaultValue[0],
};
if (!attributes.has(attrName)) attributes.set(attrName, new Map());
const namespacesForAttribute = attributes.get(attrName);
if (!namespacesForAttribute.has(namespace)) namespacesForAttribute.set(namespace, new Map());
const tagsForNSAttribute = namespacesForAttribute.get(namespace);
if (tagsForNSAttribute.has(tag)) throw new Error(`Duplicate tag-attribute combination: <${fullyQualifiedTagName} ${attrName}>`);
const globalAttr = tagsForNSAttribute.get('*');
if (globalAttr) {
if (globalAttr.boolean !== attr.boolean
|| globalAttr.redundant_if_empty !== attr.redundant_if_empty
|| globalAttr.collapse_and_trim !== attr.collapse_and_trim
|| globalAttr.default_value !== attr.default_value) {
throw new Error(`Global and tag-specific attributes conflict: ${prettyjson(globalAttr)} ${prettyjson(attr)}`);
}
} else {
tagsForNSAttribute.set(tag || '*', attr);
}
}
}
// Sort output JSON object by property so diffs are clearer.
await fs.writeFile(ATTRS_PATH, prettyjson(deepObjectifyMap(attributes)));
};
(async () => {
const source = ts.createSourceFile(`react.d.ts`, await fetchReactTypingsSource(), ts.ScriptTarget.ES2019);
await processReactTypeDeclarations(source);
})();

View File

@ -1,8 +0,0 @@
{
"private": true,
"dependencies": {
"request": "^2.88.0",
"request-promise-native": "^1.0.8",
"typescript": "^3.7.4"
}
}

160
gen/data/attrs.json Normal file
View File

@ -0,0 +1,160 @@
{
"align": [
{
"tags": [
"html:img"
],
"defaultValue": "bottom"
}
],
"decoding": [
{
"tags": [
"html:img"
],
"defaultValue": "auto"
}
],
"enctype": [
{
"tags": [
"html:form"
],
"defaultValue": "application/x-www-form-urlencoded"
}
],
"frameborder": [
{
"tags": [
"html:iframe"
],
"defaultValue": "1",
"isPositiveInteger": true
}
],
"formenctype": [
{
"tags": [
"html:button",
"html:input"
],
"defaultValue": "application/x-www-form-urlencoded"
}
],
"height": [
{
"tags": [
"html:iframe"
],
"defaultValue": "150",
"isPositiveInteger": true
}
],
"importance": [
{
"tags": [
"html:iframe"
],
"defaultValue": "auto"
}
],
"loading": [
{
"tags": [
"html:iframe",
"html:img"
],
"defaultValue": "eager"
}
],
"media": [
{
"tags": [
"html:style"
],
"defaultValue": "all"
}
],
"method": [
{
"tags": [
"html:form"
],
"defaultValue": "get"
}
],
"referrerpolicy": [
{
"tags": [
"html:iframe",
"html:img"
],
"defaultValue": "no-referrer-when-downgrade"
}
],
"rules": [
{
"tags": [
"html:table"
],
"defaultValue": "none"
}
],
"shape": [
{
"tags": [
"html:area"
],
"defaultValue": "rect"
}
],
"span": [
{
"tags": [
"html:col",
"html:colgroup"
],
"defaultValue": "1",
"isPositiveInteger": true
}
],
"target": [
{
"tags": [
"html:a",
"html:form"
],
"defaultValue": "_self"
}
],
"type": [
{
"tags": [
"html:button"
],
"defaultValue": "submit"
},
{
"tags": [
"html:input"
],
"defaultValue": "text"
},
{
"tags": [
"html:link",
"html:style"
],
"defaultValue": "text/css"
}
],
"width": [
{
"tags": [
"html:iframe"
],
"defaultValue": "300",
"isPositiveInteger": true
}
]
}

77
gen/data/dfa.yaml Normal file
View File

@ -0,0 +1,77 @@
# Prefixes:
# `_` means to lowercase accumulate.
# `<` means to accumulate transition pattern as part of current state.
# `+` means to accumulate transition pattern as part of next state.
# `?` means to look ahead but don't accumulate transition pattern and allow next state to reconsume.
Text:
'\w': ?TextWhitespace
'\<': +OpeningTagStart
'\</': +ClosingTag
'\<!--': +Comment
'&': ?TextEntity
'': Text
TextWhitespace:
'\w': TextWhitespace
'&': ?TextEntity
'': ?Text
Comment:
'-->': <Text
'': Comment
ClosingTag:
'<tagName>': _ClosingTag
'>': <Text
OpeningTagStart:
'\w': ?OpeningTagWhitespace
'<tagName>': _OpeningTagStart
OpeningTagWhitespace:
'\w': OpeningTagWhitespace
'<attrName>': ?AttrName
'>': <Text
AttrName:
'[>=\w]': ?AttrAfterName
'<attrName>': _AttrName
AttrAfterName:
'\w': AttrAfterName
'>': ?OpeningTagWhitespace
'=': +AttrBeforeValue
AttrBeforeValue:
'\w': AttrBeforeValue
"'": +AttrSingleQuotedValue
'"': +AttrDoubleQuotedValue
'': ?AttrUnquotedValue
AttrSingleQuotedValue:
"'": <OpeningTagWhitespace
'&': ?AttrValueEntity
'\w': ?AttrSingleQuotedValueWhitespace
'': AttrSingleQuotedValue
AttrSingleQuotedValueWhitespace:
'\w': AttrSingleQuotedValueWhitespace
'&': ?AttrValueEntity
'': ?AttrSingleQuotedValue
AttrDoubleQuotedValue:
'"': <OpeningTagWhitespace
'&': ?AttrValueEntity
'\w': ?AttrDoubleQuotedValueWhitespace
'': AttrDoubleQuotedValue
AttrDoubleQuotedValueWhitespace:
'\w': AttrDoubleQuotedValueWhitespace
'&': ?AttrValueEntity
'': ?AttrDoubleQuotedValue
AttrUnquotedValue:
'\w': ?OpeningTagWhitespace
'&': ?AttrValueEntity
'': AttrUnquotedValue

3150
gen/data/react.d.ts vendored Normal file

File diff suppressed because it is too large Load Diff

71
gen/dfa.ts Normal file
View File

@ -0,0 +1,71 @@
import yaml from 'yaml';
import {DATA_DIR, RUST_OUT_DIR} from './_common';
import {readFileSync, writeFileSync} from 'fs';
import {join} from 'path';
import {EOL} from 'os';
import {parsePattern, TrieBuilder} from './trie';
const dfa: { [node: string]: { [transition: string]: string } } = yaml.parse(readFileSync(join(DATA_DIR, 'dfa.yaml'), 'utf8'));
// These states must always exist; see lex/mod.rs for more details.
dfa['TextEntity'] = {};
dfa['AttrValueEntity'] = {};
dfa['Unknown'] = {};
dfa['EOF'] = {};
const nodes = Object.keys(dfa).sort();
const rsTransition = (val: string) => {
const [_, flag, next] = /^([_<+?]?)(.*)$/.exec(val)!;
const consumeMode = {
'_': 'AccumulateLowerCase',
'': 'Accumulate',
'<': 'Current',
'+': 'Next',
'?': 'Reconsume',
}[flag];
return `Transition {
to: State::${next},
consume: ConsumeMode::${consumeMode},
}`;
};
const output = `
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum State {
${nodes.map((n, i) => `${n} = ${i}`).join(`,${EOL} `)}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ConsumeMode {
Current,
Next,
Reconsume,
Accumulate,
AccumulateLowerCase,
}
#[derive(Clone, Copy)]
pub struct Transition {
// Make pub to allow destructuring.
pub to: State,
pub consume: ConsumeMode,
}
${nodes.map(n => {
const trieBuilder = new TrieBuilder(n.toUpperCase(), 'Transition');
for (const [pat, val] of Object.entries(dfa[n])) {
if (pat == '') {
continue;
}
trieBuilder.addPattern(parsePattern(pat), rsTransition(val));
}
if (dfa[n][''] !== undefined) {
trieBuilder.fillRemaining(rsTransition(dfa[n]['']));
}
return trieBuilder.generate();
}).join(EOL + EOL)}
pub static TRANSITIONS: [&'static crate::pattern::TrieNode<Transition>; ${nodes.length}] = [${nodes.map(n => n.toUpperCase()).join(', ')}];
`;
writeFileSync(join(RUST_OUT_DIR, 'dfa.rs'), output);

28
gen/entities.ts Normal file
View File

@ -0,0 +1,28 @@
import {readFileSync, writeFileSync} from 'fs';
import {join} from 'path';
import {byteStringLiteral, DATA_DIR, RUST_OUT_DIR} from './_common';
import {parsePattern, TrieBuilder} from './trie';
const entities: {[name: string]: {codepoints: number[]; characters: string;}} = JSON.parse(readFileSync(join(DATA_DIR, 'entities.json'), 'utf8'));
const trieBuilder = new TrieBuilder('ENTITY', "EntityType");
trieBuilder.addPattern(parsePattern("&#[0-9]"), 'EntityType::Dec');
trieBuilder.addPattern(parsePattern("&#x[0-9a-fA-F]"), 'EntityType::Hex');
for (const [rep, entity] of Object.entries(entities)) {
const bytes = Buffer.from(entity.characters, 'utf8');
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
const val = byteStringLiteral(rep.length < bytes.length ? [...rep].map(c => c.charCodeAt(0)) : [...bytes]);
trieBuilder.add(rep, `EntityType::Named(${val})`);
}
const output = `
#[derive(Clone, Copy)]
pub enum EntityType {
Named(&'static [u8]),
Dec,
Hex,
}
${trieBuilder.generate()}
`;
writeFileSync(join(RUST_OUT_DIR, 'entities.rs'), output);

9
gen/package.json Normal file
View File

@ -0,0 +1,9 @@
{
"private": true,
"dependencies": {
"@types/node": "^14.0.5",
"ts-node": "^8.10.1",
"typescript": "^3.7.4",
"yaml": "^1.10.0"
}
}

31
gen/patterns.ts Normal file
View File

@ -0,0 +1,31 @@
import {readFileSync, writeFileSync} from 'fs';
import {DATA_DIR, RUST_OUT_DIR} from './_common';
import {join} from 'path';
import {EOL} from 'os';
const patterns: {[name: string]: string} = JSON.parse(readFileSync(join(DATA_DIR, 'patterns.json'), 'utf8'));
const chr = (str: string, char: number) => str.charCodeAt(char);
const buildPattern = (seq: string): string => {
const dfa = Array.from({length: 256}, () => Array(seq.length).fill(0));
dfa[chr(seq, 0)][0] = 1;
let x = 0;
let j = 1;
while (j < seq.length) {
for (let c = 0; c < 256; c++) {
dfa[c][j] = dfa[c][x];
}
dfa[chr(seq, j)][j] = j + 1;
x = dfa[chr(seq, j)][x];
j += 1;
}
return `crate::pattern::SinglePattern::prebuilt(&[${dfa.flat().join(', ')}], ${seq.length})`;
};
const output = Object.entries(patterns)
.map(([name, pattern]) => `pub static ${name}: &crate::pattern::SinglePattern = &${buildPattern(pattern)};`);
writeFileSync(join(RUST_OUT_DIR, 'patterns.rs'), output.join(EOL));

171
gen/trie.ts Normal file
View File

@ -0,0 +1,171 @@
import {EOL} from 'os';
const customCharClasses = {
tagName: '[a-zA-Z-]',
attrName: '[a-zA-Z-]',
};
const whitespaceClass = [' ', '\r', '\n', '\t', '\v', '\f'];
const charRange = (from: string, to: string) => {
const res = [];
for (let i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
res.push(String.fromCharCode(i));
}
return res;
};
const parsePatternEscape = (pat: string, at: number): string[] => {
switch (pat[at]) {
case '\\':
return ['\\'];
case ']':
return [']'];
case '<':
return ['<'];
case 'w':
return whitespaceClass;
default:
throw new Error(`Unknown pattern escape: ${pat[at]}`);
}
};
const parsePatternClass = (pat: string, from: number): [string[], number] => {
const chars: string[] = [];
for (let i = from; i < pat.length; i++) {
switch (pat[i]) {
case '\\':
chars.push(...parsePatternEscape(pat, ++i));
break;
case ']':
return [chars, i];
default:
if (pat[i + 1] === '-' && pat[i + 2] !== undefined) {
chars.push(...charRange(pat[i], pat[i + 2]));
i += 2;
} else {
chars.push(pat[i]);
}
break;
}
}
throw new Error(`Unexpected end of pattern: ${pat}`);
};
const parsePatternCustomClass = (pat: string, from: number): [string[], number] => {
const endIdx = pat.indexOf('>', from);
if (endIdx == -1) throw new Error(`Unexpected end of pattern: ${pat}`);
return [parsePatternClass(customCharClasses[pat.slice(from, endIdx)], 1)[0], endIdx];
};
export const parsePattern = (pat: string): string[][] => {
const res: string[][] = [];
for (let i = 0; i < pat.length; i++) {
switch (pat[i]) {
case '\\':
res.push(parsePatternEscape(pat, ++i));
break;
case '[':
const sg = parsePatternClass(pat, i + 1);
res.push(sg[0]);
i = sg[1];
break;
case '<':
const cc = parsePatternCustomClass(pat, i + 1);
res.push(cc[0]);
i = cc[1];
break;
default:
res.push([pat[i]]);
}
}
return res;
};
type Node = {
children: Node[];
value?: string;
};
const createNode = (value?: string) => ({value, children: []});
export class TrieBuilder {
private readonly root: Node = createNode();
private readonly variables: string[] = [];
private nextId: number = 0;
private readonly codeCache: Map<string, string> = new Map();
constructor (
private readonly name: string,
private readonly valueType: string,
) {
}
fillRemaining (val: string): this {
const {children} = this.root;
for (let i = 0; i < 256; i++) {
children[i] = children[i] || createNode(val);
}
return this;
}
add (seq: string, val: string): this {
let cur: Node = this.root;
for (let i = 0; i < seq.length; i++) {
const c = seq.charCodeAt(i);
if (c > 255) throw new Error('Not a byte');
cur = cur.children[c] = cur.children[c] || createNode();
}
cur.value = val;
return this;
}
addPattern (pattern: string[][], val: string): this {
let cur: Node[] = [this.root];
for (const cls of pattern) {
const next: Node[] = [];
for (let i = 0; i < cls.length; i++) {
if (cls[i].length !== 1) throw new Error(`Not a byte`);
const c = cls[i].charCodeAt(0);
if (c > 255) throw new Error('Not a byte');
next.push(...cur.map(n => n.children[c] = n.children[c] || createNode()));
}
cur = next;
}
cur.forEach(n => n.value = val);
return this;
}
// Generate the code for a node's variable name and value, and return the name.
private generateNode (node: Node): string {
// Only generate elements up to the last non-undefined child to cut down on size of array.
const children = Array.from(
{length: node.children.length},
(_, i) => node.children[i] ? `Some(${this.generateNode(node.children[i])})` : 'None',
).join(', ');
const value = node.value === undefined ? 'None' : `Some(${node.value})`;
const varValue = `&crate::pattern::TrieNode {
value: ${value},
children: &[${children}],
}`;
const existingVarName = this.codeCache.get(varValue);
if (existingVarName) {
return existingVarName;
}
const name = `${this.name}_NODE_${this.nextId++}`;
this.variables.push(`static ${name}: &'static crate::pattern::TrieNode<${this.valueType}> = ${varValue};`);
this.codeCache.set(varValue, name);
return name;
}
generate (): string {
this.variables.splice(0, this.variables.length);
this.nextId = 0;
const rootName = this.generateNode(this.root);
// Make root node public and use proper name.
return this.variables.join(EOL + EOL).replace(`static ${rootName}`, `pub static ${this.name}`);
}
}

29
gen/tsconfig.json Normal file
View File

@ -0,0 +1,29 @@
{
"include": [
"*.ts"
],
"compilerOptions": {
"allowJs": false,
"alwaysStrict": true,
"declaration": true,
"esModuleInterop": true,
"lib": [
"es2020"
],
"module": "commonjs",
"noFallthroughCasesInSwitch": true,
"noImplicitAny": true,
"noImplicitReturns": true,
"noImplicitThis": true,
"noUnusedParameters": true,
"outDir": "dist",
"skipLibCheck": true,
"strict": true,
"strictFunctionTypes": true,
"strictNullChecks": true,
"strictPropertyInitialization": true,
"suppressImplicitAnyIndexErrors": true,
"target": "es6"
}
}