36 lines
1.2 KiB
TypeScript
36 lines
1.2 KiB
TypeScript
import { readFileSync, writeFileSync } from "fs";
|
|
import { join } from "path";
|
|
import { byteStringLiteral, DATA_DIR, RUST_OUT_DIR } from "./_common";
|
|
import { parsePattern, TrieBuilder } from "./trie";
|
|
|
|
const entities: {
|
|
[name: string]: { codepoints: number[]; characters: string };
|
|
} = JSON.parse(readFileSync(join(DATA_DIR, "entities.json"), "utf8"));
|
|
|
|
const trieBuilder = new TrieBuilder("ENTITY", "EntityType");
|
|
trieBuilder.addPattern(parsePattern("&#[0-9]"), "EntityType::Dec");
|
|
trieBuilder.addPattern(parsePattern("&#x[0-9a-fA-F]"), "EntityType::Hex");
|
|
for (const [encoded, entity] of Object.entries(entities)) {
|
|
const encodedBytes = Buffer.from(encoded, "utf8");
|
|
const decodedBytes = Buffer.from(entity.characters, "utf8");
|
|
// We should not decode if encoded is shorter than decoded.
|
|
const val = byteStringLiteral([
|
|
...(encodedBytes.length < decodedBytes.length
|
|
? encodedBytes
|
|
: decodedBytes),
|
|
]);
|
|
trieBuilder.add(encoded, `EntityType::Named(${val})`);
|
|
}
|
|
|
|
const output = `
|
|
#[derive(Clone, Copy)]
|
|
pub enum EntityType {
|
|
Named(&'static [u8]),
|
|
Dec,
|
|
Hex,
|
|
}
|
|
|
|
${trieBuilder.generate()}
|
|
`;
|
|
writeFileSync(join(RUST_OUT_DIR, "entities.rs"), output);
|