30 lines
1.1 KiB
TypeScript
30 lines
1.1 KiB
TypeScript
import {readFileSync, writeFileSync} from 'fs';
|
|
import {join} from 'path';
|
|
import {byteStringLiteral, DATA_DIR, RUST_OUT_DIR} from './_common';
|
|
import {parsePattern, TrieBuilder} from './trie';
|
|
|
|
const entities: {[name: string]: {codepoints: number[]; characters: string;}} = JSON.parse(readFileSync(join(DATA_DIR, 'entities.json'), 'utf8'));
|
|
|
|
const trieBuilder = new TrieBuilder('ENTITY', "EntityType");
|
|
trieBuilder.addPattern(parsePattern("&#[0-9]"), 'EntityType::Dec');
|
|
trieBuilder.addPattern(parsePattern("&#x[0-9a-fA-F]"), 'EntityType::Hex');
|
|
for (const [encoded, entity] of Object.entries(entities)) {
|
|
const encodedBytes = Buffer.from(encoded, "utf8");
|
|
const decodedBytes = Buffer.from(entity.characters, 'utf8');
|
|
// We should not decode if encoded is shorter than decoded.
|
|
const val = byteStringLiteral([...encodedBytes.length < decodedBytes.length ? encodedBytes : decodedBytes]);
|
|
trieBuilder.add(encoded, `EntityType::Named(${val})`);
|
|
}
|
|
|
|
const output = `
|
|
#[derive(Clone, Copy)]
|
|
pub enum EntityType {
|
|
Named(&'static [u8]),
|
|
Dec,
|
|
Hex,
|
|
}
|
|
|
|
${trieBuilder.generate()}
|
|
`;
|
|
writeFileSync(join(RUST_OUT_DIR, 'entities.rs'), output);
|