Use regex crate for substring searches
This commit is contained in:
parent
267e007807
commit
a519394c3a
10 changed files with 40 additions and 84 deletions
|
|
@ -13,7 +13,6 @@ try {
|
|||
writeFileSync(join(RUST_OUT_DIR, 'mod.rs'), `
|
||||
pub mod attrs;
|
||||
pub mod entities;
|
||||
pub mod patterns;
|
||||
`);
|
||||
|
||||
export const DATA_DIR = join(__dirname, 'data');
|
||||
|
|
|
|||
|
|
@ -1,6 +0,0 @@
|
|||
{
|
||||
"COMMENT_END": "-->",
|
||||
"STYLE_END": "</style",
|
||||
"SCRIPT_END": "</script",
|
||||
"INSTRUCTION_END": "?>"
|
||||
}
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
import {readFileSync, writeFileSync} from 'fs';
|
||||
import {DATA_DIR, RUST_OUT_DIR} from './_common';
|
||||
import {join} from 'path';
|
||||
import {EOL} from 'os';
|
||||
|
||||
const patterns: {[name: string]: string} = JSON.parse(readFileSync(join(DATA_DIR, 'patterns.json'), 'utf8'));
|
||||
|
||||
const chr = (str: string, char: number) => str.charCodeAt(char);
|
||||
|
||||
const buildPattern = (seq: string): string => {
|
||||
const dfa = Array.from({length: 256}, () => Array(seq.length).fill(0));
|
||||
|
||||
dfa[chr(seq, 0)][0] = 1;
|
||||
let x = 0;
|
||||
let j = 1;
|
||||
while (j < seq.length) {
|
||||
for (let c = 0; c < 256; c++) {
|
||||
dfa[c][j] = dfa[c][x];
|
||||
}
|
||||
dfa[chr(seq, j)][j] = j + 1;
|
||||
x = dfa[chr(seq, j)][x];
|
||||
j += 1;
|
||||
}
|
||||
|
||||
return `crate::pattern::SinglePattern::prebuilt(&[${dfa.flat().join(', ')}], ${seq.length})`;
|
||||
};
|
||||
|
||||
const output = Object.entries(patterns)
|
||||
.map(([name, pattern]) => `pub static ${name}: &crate::pattern::SinglePattern = &${buildPattern(pattern)};`);
|
||||
|
||||
writeFileSync(join(RUST_OUT_DIR, 'patterns.rs'), output.join(EOL));
|
||||
Reference in a new issue