minify-html/gen/entities.ts

46 lines
1.5 KiB
TypeScript
Raw Permalink Normal View History

2021-08-06 07:07:58 -04:00
import { readFileSync, writeFileSync } from "fs";
import { join } from "path";
import { byteStringLiteral, DATA_DIR, RUST_OUT_DIR } from "./_common";
import { parsePattern, TrieBuilder } from "./trie";
2021-08-06 07:07:58 -04:00
const entities: {
[name: string]: { codepoints: number[]; characters: string };
} = JSON.parse(readFileSync(join(DATA_DIR, "entities.json"), "utf8"));
2021-08-06 07:07:58 -04:00
const trieBuilder = new TrieBuilder("ENTITY", "EntityType");
trieBuilder.addPattern(parsePattern("&#[0-9]"), "EntityType::Dec");
trieBuilder.addPattern(parsePattern("&#x[0-9a-fA-F]"), "EntityType::Hex");
const shorterEncodedEntities = [];
2021-08-05 23:36:07 -04:00
for (const [encoded, entity] of Object.entries(entities)) {
const encodedBytes = Buffer.from(encoded, "utf8");
2021-08-06 07:07:58 -04:00
const decodedBytes = Buffer.from(entity.characters, "utf8");
const val = byteStringLiteral([...decodedBytes]);
2021-08-05 23:36:07 -04:00
trieBuilder.add(encoded, `EntityType::Named(${val})`);
// We should encode if encoded is shorter than decoded.
if (encodedBytes.byteLength < decodedBytes.byteLength) {
shorterEncodedEntities.push([
byteStringLiteral([...encodedBytes]),
val,
] as const);
}
}
const output = `
2021-08-06 09:23:05 -04:00
pub static SHORTER_ENCODED_ENTITIES_ENCODED: &[&[u8]] = &[
${shorterEncodedEntities.map(([encoded, _]) => encoded).join(",\n ")}
];
2021-08-06 09:23:05 -04:00
pub static SHORTER_ENCODED_ENTITIES_DECODED: &[&[u8]] = &[
${shorterEncodedEntities.map(([_, decoded]) => decoded).join(",\n ")}
];
#[derive(Clone, Copy)]
pub enum EntityType {
Named(&'static [u8]),
Dec,
Hex,
}
${trieBuilder.generate()}
`;
2021-08-06 07:07:58 -04:00
writeFileSync(join(RUST_OUT_DIR, "entities.rs"), output);