2021-08-06 07:07:58 -04:00
|
|
|
import { EOL } from "os";
|
2020-06-19 03:16:23 -04:00
|
|
|
|
|
|
|
const customCharClasses = {
|
2021-08-06 07:07:58 -04:00
|
|
|
tagName: "[a-zA-Z-]",
|
|
|
|
attrName: "[a-zA-Z-]",
|
2020-06-19 03:16:23 -04:00
|
|
|
};
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
const whitespaceClass = [" ", "\r", "\n", "\t", "\v", "\f"];
|
2020-06-19 03:16:23 -04:00
|
|
|
|
|
|
|
const charRange = (from: string, to: string) => {
|
|
|
|
const res = [];
|
|
|
|
for (let i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
|
|
|
|
res.push(String.fromCharCode(i));
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
};
|
|
|
|
|
|
|
|
const parsePatternEscape = (pat: string, at: number): string[] => {
|
|
|
|
switch (pat[at]) {
|
2021-08-06 07:07:58 -04:00
|
|
|
case "\\":
|
|
|
|
return ["\\"];
|
|
|
|
case "]":
|
|
|
|
return ["]"];
|
|
|
|
case "<":
|
|
|
|
return ["<"];
|
|
|
|
case "w":
|
|
|
|
return whitespaceClass;
|
|
|
|
default:
|
|
|
|
throw new Error(`Unknown pattern escape: ${pat[at]}`);
|
2020-06-19 03:16:23 -04:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
const parsePatternClass = (pat: string, from: number): [string[], number] => {
|
|
|
|
const chars: string[] = [];
|
|
|
|
for (let i = from; i < pat.length; i++) {
|
|
|
|
switch (pat[i]) {
|
2021-08-06 07:07:58 -04:00
|
|
|
case "\\":
|
|
|
|
chars.push(...parsePatternEscape(pat, ++i));
|
|
|
|
break;
|
|
|
|
case "]":
|
|
|
|
return [chars, i];
|
|
|
|
default:
|
|
|
|
if (pat[i + 1] === "-" && pat[i + 2] !== undefined) {
|
|
|
|
chars.push(...charRange(pat[i], pat[i + 2]));
|
|
|
|
i += 2;
|
|
|
|
} else {
|
|
|
|
chars.push(pat[i]);
|
|
|
|
}
|
|
|
|
break;
|
2020-06-19 03:16:23 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
throw new Error(`Unexpected end of pattern: ${pat}`);
|
|
|
|
};
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
const parsePatternCustomClass = (
|
|
|
|
pat: string,
|
|
|
|
from: number
|
|
|
|
): [string[], number] => {
|
|
|
|
const endIdx = pat.indexOf(">", from);
|
2020-06-19 03:16:23 -04:00
|
|
|
if (endIdx == -1) throw new Error(`Unexpected end of pattern: ${pat}`);
|
2021-08-06 07:07:58 -04:00
|
|
|
return [
|
|
|
|
parsePatternClass(customCharClasses[pat.slice(from, endIdx)], 1)[0],
|
|
|
|
endIdx,
|
|
|
|
];
|
2020-06-19 03:16:23 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
export const parsePattern = (pat: string): string[][] => {
|
|
|
|
const res: string[][] = [];
|
|
|
|
for (let i = 0; i < pat.length; i++) {
|
|
|
|
switch (pat[i]) {
|
2021-08-06 07:07:58 -04:00
|
|
|
case "\\":
|
|
|
|
res.push(parsePatternEscape(pat, ++i));
|
|
|
|
break;
|
|
|
|
case "[":
|
|
|
|
const sg = parsePatternClass(pat, i + 1);
|
|
|
|
res.push(sg[0]);
|
|
|
|
i = sg[1];
|
|
|
|
break;
|
|
|
|
case "<":
|
|
|
|
const cc = parsePatternCustomClass(pat, i + 1);
|
|
|
|
res.push(cc[0]);
|
|
|
|
i = cc[1];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
res.push([pat[i]]);
|
2020-06-19 03:16:23 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
};
|
|
|
|
|
|
|
|
type Node = {
|
|
|
|
children: Node[];
|
|
|
|
value?: string;
|
|
|
|
};
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
const createNode = (value?: string) => ({ value, children: [] });
|
2020-06-19 03:16:23 -04:00
|
|
|
|
|
|
|
export class TrieBuilder {
|
|
|
|
private readonly root: Node = createNode();
|
|
|
|
|
|
|
|
private readonly variables: string[] = [];
|
|
|
|
private nextId: number = 0;
|
|
|
|
private readonly codeCache: Map<string, string> = new Map();
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
constructor(
|
2020-06-19 03:16:23 -04:00
|
|
|
private readonly name: string,
|
2021-08-06 07:07:58 -04:00
|
|
|
private readonly valueType: string
|
|
|
|
) {}
|
2020-06-19 03:16:23 -04:00
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
fillRemaining(val: string): this {
|
|
|
|
const { children } = this.root;
|
2020-06-19 03:16:23 -04:00
|
|
|
for (let i = 0; i < 256; i++) {
|
|
|
|
children[i] = children[i] || createNode(val);
|
|
|
|
}
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
add(seq: string, val: string): this {
|
2020-06-19 03:16:23 -04:00
|
|
|
let cur: Node = this.root;
|
|
|
|
for (let i = 0; i < seq.length; i++) {
|
|
|
|
const c = seq.charCodeAt(i);
|
2021-08-06 07:07:58 -04:00
|
|
|
if (c > 255) throw new Error("Not a byte");
|
2020-06-19 03:16:23 -04:00
|
|
|
cur = cur.children[c] = cur.children[c] || createNode();
|
|
|
|
}
|
|
|
|
cur.value = val;
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
addPattern(pattern: string[][], val: string): this {
|
2020-06-19 03:16:23 -04:00
|
|
|
let cur: Node[] = [this.root];
|
|
|
|
for (const cls of pattern) {
|
|
|
|
const next: Node[] = [];
|
|
|
|
for (let i = 0; i < cls.length; i++) {
|
|
|
|
if (cls[i].length !== 1) throw new Error(`Not a byte`);
|
|
|
|
const c = cls[i].charCodeAt(0);
|
2021-08-06 07:07:58 -04:00
|
|
|
if (c > 255) throw new Error("Not a byte");
|
|
|
|
next.push(
|
|
|
|
...cur.map((n) => (n.children[c] = n.children[c] || createNode()))
|
|
|
|
);
|
2020-06-19 03:16:23 -04:00
|
|
|
}
|
|
|
|
cur = next;
|
|
|
|
}
|
2021-08-06 07:07:58 -04:00
|
|
|
cur.forEach((n) => (n.value = val));
|
2020-06-19 03:16:23 -04:00
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the code for a node's variable name and value, and return the name.
|
2021-08-06 07:07:58 -04:00
|
|
|
private generateNode(node: Node): string {
|
2020-06-19 04:12:55 -04:00
|
|
|
// Only generate defined children to cut down on size of array, which would otherwise
|
|
|
|
// bog down compile time and binary size for large trees with lots of nodes.
|
|
|
|
// If array is empty, just use zero.
|
2021-08-06 07:07:58 -04:00
|
|
|
const firstIdx = node.children.length && node.children.findIndex((v) => v);
|
2020-06-19 03:16:23 -04:00
|
|
|
const children = Array.from(
|
2021-08-06 07:07:58 -04:00
|
|
|
{ length: node.children.length - firstIdx },
|
|
|
|
(_, i) =>
|
|
|
|
node.children[i + firstIdx]
|
|
|
|
? `Some(${this.generateNode(node.children[i + firstIdx])})`
|
|
|
|
: "None"
|
|
|
|
).join(", ");
|
|
|
|
|
|
|
|
const value = node.value === undefined ? "None" : `Some(${node.value})`;
|
2020-06-19 03:16:23 -04:00
|
|
|
const varValue = `&crate::pattern::TrieNode {
|
2020-06-19 04:12:55 -04:00
|
|
|
offset: ${firstIdx},
|
2020-06-19 03:16:23 -04:00
|
|
|
value: ${value},
|
|
|
|
children: &[${children}],
|
|
|
|
}`;
|
|
|
|
const existingVarName = this.codeCache.get(varValue);
|
|
|
|
if (existingVarName) {
|
|
|
|
return existingVarName;
|
|
|
|
}
|
|
|
|
|
|
|
|
const name = `${this.name}_NODE_${this.nextId++}`;
|
2021-08-06 07:07:58 -04:00
|
|
|
this.variables.push(
|
2021-08-06 09:23:05 -04:00
|
|
|
`static ${name}: &crate::pattern::TrieNode<${this.valueType}> = ${varValue};`
|
2021-08-06 07:07:58 -04:00
|
|
|
);
|
2020-06-19 03:16:23 -04:00
|
|
|
this.codeCache.set(varValue, name);
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2021-08-06 07:07:58 -04:00
|
|
|
generate(): string {
|
2020-06-19 03:16:23 -04:00
|
|
|
this.variables.splice(0, this.variables.length);
|
|
|
|
this.nextId = 0;
|
|
|
|
const rootName = this.generateNode(this.root);
|
|
|
|
// Make root node public and use proper name.
|
2021-08-06 07:07:58 -04:00
|
|
|
return this.variables
|
|
|
|
.join(EOL + EOL)
|
|
|
|
.replace(`static ${rootName}`, `pub static ${this.name}`);
|
2020-06-19 03:16:23 -04:00
|
|
|
}
|
|
|
|
}
|