Fix entities decoding to longer sequence
This commit is contained in:
parent
ec3838c228
commit
886db3ea42
18
build.rs
18
build.rs
|
@ -57,6 +57,7 @@ struct TrieStats {
|
|||
total_clusters: usize,
|
||||
maximum_clusters_single_node: usize,
|
||||
maximum_cluster_length: usize,
|
||||
maximum_cluster_gaps: usize,
|
||||
total_nodes: usize,
|
||||
}
|
||||
|
||||
|
@ -108,14 +109,6 @@ impl TrieBuilderNode {
|
|||
format!("{}TrieNode{}", camel_case(trie_name), node_id)
|
||||
}
|
||||
|
||||
fn _dummy_node_type_name(trie_name: &Vec<String>) -> String {
|
||||
format!("{}DummyTrieNode", camel_case(trie_name))
|
||||
}
|
||||
|
||||
fn _dummy_node_var_name(trie_name: &Vec<String>) -> String {
|
||||
format!("{}_DUMMY_TRIE_NODE", snake_case(trie_name))
|
||||
}
|
||||
|
||||
fn _build(&self, ai: &mut AutoIncrement, stats: &mut TrieStats, name: &Vec<String>, value_type: &str, out: &mut String) -> usize {
|
||||
let id = ai.next();
|
||||
let node_type_name = TrieBuilderNode::_node_type_name(name, id);
|
||||
|
@ -146,6 +139,7 @@ impl TrieBuilderNode {
|
|||
stats.total_clusters += child_char_clusters.len();
|
||||
stats.maximum_clusters_single_node = max(stats.maximum_clusters_single_node, child_char_clusters.len());
|
||||
stats.maximum_cluster_length = max(stats.maximum_cluster_length, child_char_clusters.iter().map(|c| c.len()).max().unwrap_or(0));
|
||||
stats.maximum_cluster_gaps = max(stats.maximum_cluster_gaps, child_char_clusters.iter().map(|c| c.iter().filter(|c| c.is_none()).count()).max().unwrap_or(0));
|
||||
stats.total_nodes += 1;
|
||||
|
||||
out.push_str(format!("struct {} {{\n", node_type_name).as_str());
|
||||
|
@ -205,6 +199,7 @@ impl TrieBuilderNode {
|
|||
total_clusters: 0,
|
||||
maximum_clusters_single_node: 0,
|
||||
maximum_cluster_length: 0,
|
||||
maximum_cluster_gaps: 0,
|
||||
total_nodes: 0,
|
||||
};
|
||||
let root_id = self._build(&mut AutoIncrement::new(), &mut stats, &name_words, value_type, &mut code);
|
||||
|
@ -257,7 +252,12 @@ fn generate_entities() {
|
|||
// Add entities to trie builder.
|
||||
let mut trie_builder = TrieBuilderNode::new();
|
||||
for (rep, entity) in entities {
|
||||
trie_builder.add(&rep[1..], create_byte_string_literal(entity.characters.as_bytes()));
|
||||
if rep.as_bytes().len() < entity.characters.as_bytes().len() {
|
||||
// Since we're minifying in place, we need to guarantee we'll never write something longer than source.
|
||||
println!("Entity {} is shorter than decoded UTF-8 bytes, skipping...", rep);
|
||||
} else {
|
||||
trie_builder.add(&rep[1..], create_byte_string_literal(entity.characters.as_bytes()));
|
||||
};
|
||||
};
|
||||
// Generate trie code from builder.
|
||||
let trie_code = trie_builder.build("entity references", "&'static [u8]");
|
||||
|
|
|
@ -439,7 +439,9 @@ impl<'d> Processor<'d> {
|
|||
pub fn accept(&mut self) -> ProcessingResult<u8> {
|
||||
if !self.at_end() {
|
||||
let c = self._read_offset(0);
|
||||
self._shift(1);
|
||||
self.code[self.write_next] = c;
|
||||
self.read_next += 1;
|
||||
self.write_next += 1;
|
||||
Ok(c)
|
||||
} else {
|
||||
Err(ErrorType::UnexpectedEnd)
|
||||
|
@ -448,7 +450,9 @@ impl<'d> Processor<'d> {
|
|||
pub fn accept_expect(&mut self) -> u8 {
|
||||
debug_assert!(!self.at_end());
|
||||
let c = self._read_offset(0);
|
||||
self._shift(1);
|
||||
self.code[self.write_next] = c;
|
||||
self.read_next += 1;
|
||||
self.write_next += 1;
|
||||
c
|
||||
}
|
||||
pub fn accept_amount_expect(&mut self, count: usize) -> () {
|
||||
|
|
|
@ -33,7 +33,7 @@ impl ContentType {
|
|||
|
||||
fn peek(proc: &mut Processor) -> ContentType {
|
||||
// Manually write out matching for fast performance as this is hot spot; don't use generated trie.
|
||||
match proc.peek_eof() {
|
||||
match proc.peek_offset_eof(0) {
|
||||
None => ContentType::End,
|
||||
Some(b'<') => match proc.peek_offset_eof(1) {
|
||||
Some(b'/') => ContentType::End,
|
||||
|
|
Loading…
Reference in New Issue