minify-html/gen/trie.ts

172 lines
4.7 KiB
TypeScript

import {EOL} from 'os';
const customCharClasses = {
tagName: '[a-zA-Z-]',
attrName: '[a-zA-Z-]',
};
const whitespaceClass = [' ', '\r', '\n', '\t', '\v', '\f'];
const charRange = (from: string, to: string) => {
const res = [];
for (let i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
res.push(String.fromCharCode(i));
}
return res;
};
const parsePatternEscape = (pat: string, at: number): string[] => {
switch (pat[at]) {
case '\\':
return ['\\'];
case ']':
return [']'];
case '<':
return ['<'];
case 'w':
return whitespaceClass;
default:
throw new Error(`Unknown pattern escape: ${pat[at]}`);
}
};
const parsePatternClass = (pat: string, from: number): [string[], number] => {
const chars: string[] = [];
for (let i = from; i < pat.length; i++) {
switch (pat[i]) {
case '\\':
chars.push(...parsePatternEscape(pat, ++i));
break;
case ']':
return [chars, i];
default:
if (pat[i + 1] === '-' && pat[i + 2] !== undefined) {
chars.push(...charRange(pat[i], pat[i + 2]));
i += 2;
} else {
chars.push(pat[i]);
}
break;
}
}
throw new Error(`Unexpected end of pattern: ${pat}`);
};
const parsePatternCustomClass = (pat: string, from: number): [string[], number] => {
const endIdx = pat.indexOf('>', from);
if (endIdx == -1) throw new Error(`Unexpected end of pattern: ${pat}`);
return [parsePatternClass(customCharClasses[pat.slice(from, endIdx)], 1)[0], endIdx];
};
export const parsePattern = (pat: string): string[][] => {
const res: string[][] = [];
for (let i = 0; i < pat.length; i++) {
switch (pat[i]) {
case '\\':
res.push(parsePatternEscape(pat, ++i));
break;
case '[':
const sg = parsePatternClass(pat, i + 1);
res.push(sg[0]);
i = sg[1];
break;
case '<':
const cc = parsePatternCustomClass(pat, i + 1);
res.push(cc[0]);
i = cc[1];
break;
default:
res.push([pat[i]]);
}
}
return res;
};
type Node = {
children: Node[];
value?: string;
};
const createNode = (value?: string) => ({value, children: []});
export class TrieBuilder {
private readonly root: Node = createNode();
private readonly variables: string[] = [];
private nextId: number = 0;
private readonly codeCache: Map<string, string> = new Map();
constructor (
private readonly name: string,
private readonly valueType: string,
) {
}
fillRemaining (val: string): this {
const {children} = this.root;
for (let i = 0; i < 256; i++) {
children[i] = children[i] || createNode(val);
}
return this;
}
add (seq: string, val: string): this {
let cur: Node = this.root;
for (let i = 0; i < seq.length; i++) {
const c = seq.charCodeAt(i);
if (c > 255) throw new Error('Not a byte');
cur = cur.children[c] = cur.children[c] || createNode();
}
cur.value = val;
return this;
}
addPattern (pattern: string[][], val: string): this {
let cur: Node[] = [this.root];
for (const cls of pattern) {
const next: Node[] = [];
for (let i = 0; i < cls.length; i++) {
if (cls[i].length !== 1) throw new Error(`Not a byte`);
const c = cls[i].charCodeAt(0);
if (c > 255) throw new Error('Not a byte');
next.push(...cur.map(n => n.children[c] = n.children[c] || createNode()));
}
cur = next;
}
cur.forEach(n => n.value = val);
return this;
}
// Generate the code for a node's variable name and value, and return the name.
private generateNode (node: Node): string {
// Only generate elements up to the last non-undefined child to cut down on size of array.
const children = Array.from(
{length: node.children.length},
(_, i) => node.children[i] ? `Some(${this.generateNode(node.children[i])})` : 'None',
).join(', ');
const value = node.value === undefined ? 'None' : `Some(${node.value})`;
const varValue = `&crate::pattern::TrieNode {
value: ${value},
children: &[${children}],
}`;
const existingVarName = this.codeCache.get(varValue);
if (existingVarName) {
return existingVarName;
}
const name = `${this.name}_NODE_${this.nextId++}`;
this.variables.push(`static ${name}: &'static crate::pattern::TrieNode<${this.valueType}> = ${varValue};`);
this.codeCache.set(varValue, name);
return name;
}
generate (): string {
this.variables.splice(0, this.variables.length);
this.nextId = 0;
const rootName = this.generateNode(this.root);
// Make root node public and use proper name.
return this.variables.join(EOL + EOL).replace(`static ${rootName}`, `pub static ${this.name}`);
}
}