Fix hex numeric entity parsing
This commit is contained in:
parent
e306047067
commit
a415045ae6
|
@ -14,9 +14,9 @@ const WHITESPACE = [0x09, 0x0a, 0x0c, 0x0d, 0x20];
|
|||
const C0_CONTROL = rangeInclusive(0, 0x1f);
|
||||
const CONTROL = [...C0_CONTROL, ...rangeInclusive(0x7f, 0x9f)];
|
||||
const DIGIT = rangeInclusive(c('0'), c('9'));
|
||||
const UPPER_HEX_DIGIT = [...DIGIT, ...rangeInclusive(c('A'), c('F'))];
|
||||
const LOWER_HEX_DIGIT = [...DIGIT, ...rangeInclusive(c('a'), c('f'))];
|
||||
const HEX_DIGIT = [...UPPER_HEX_DIGIT, ...LOWER_HEX_DIGIT];
|
||||
const UPPER_HEX_ALPHA = [...rangeInclusive(c('A'), c('F'))];
|
||||
const LOWER_HEX_ALPHA = [...rangeInclusive(c('a'), c('f'))];
|
||||
const HEX_DIGIT = [...DIGIT, ...UPPER_HEX_ALPHA, ...LOWER_HEX_ALPHA];
|
||||
const UPPER_ALPHA = rangeInclusive(c('A'), c('Z'));
|
||||
const LOWER_ALPHA = rangeInclusive(c('a'), c('z'));
|
||||
const ALPHA = [...UPPER_ALPHA, ...LOWER_ALPHA];
|
||||
|
@ -61,8 +61,8 @@ impl std::ops::Index<u8> for Lookup {
|
|||
` + Object.entries({
|
||||
WHITESPACE,
|
||||
DIGIT,
|
||||
UPPER_HEX_DIGIT,
|
||||
LOWER_HEX_DIGIT,
|
||||
UPPER_HEX_ALPHA,
|
||||
LOWER_HEX_ALPHA,
|
||||
HEX_DIGIT,
|
||||
|
||||
ATTR_NAME_CHAR,
|
||||
|
|
|
@ -17,7 +17,7 @@ use crate::gen::entities::{ENTITY, EntityType};
|
|||
use crate::pattern::TrieNodeMatch;
|
||||
use std::char::from_u32;
|
||||
use crate::proc::Processor;
|
||||
use crate::gen::codepoints::{DIGIT, HEX_DIGIT, LOWER_HEX_DIGIT, UPPER_HEX_DIGIT, Lookup};
|
||||
use crate::gen::codepoints::{DIGIT, HEX_DIGIT, LOWER_HEX_ALPHA, UPPER_HEX_ALPHA, Lookup};
|
||||
|
||||
enum Parsed {
|
||||
// This includes numeric entities that were invalid and decoded to 0xFFFD.
|
||||
|
@ -93,8 +93,8 @@ fn parse_entity(code: &mut [u8], read_pos: usize, write_pos: usize) -> Parsed {
|
|||
HEX_DIGIT,
|
||||
|value, c| value.wrapping_mul(16).wrapping_add(match c {
|
||||
c if DIGIT[c] => (c - b'0') as u32,
|
||||
c if LOWER_HEX_DIGIT[c] => (c - b'a') as u32,
|
||||
c if UPPER_HEX_DIGIT[c] => (c - b'A') as u32,
|
||||
c if LOWER_HEX_ALPHA[c] => 10 + (c - b'a') as u32,
|
||||
c if UPPER_HEX_ALPHA[c] => 10 + (c - b'A') as u32,
|
||||
_ => unreachable!(),
|
||||
}),
|
||||
6,
|
||||
|
|
|
@ -260,6 +260,10 @@ fn test_attr_value_backtick() {
|
|||
|
||||
#[test]
|
||||
fn test_hexadecimal_entity_decoding() {
|
||||
eval(b".", b".");
|
||||
eval(b"/", b"/");
|
||||
eval(b"/", b"/");
|
||||
eval(b"�", b"\0");
|
||||
eval(b"0", b"0");
|
||||
eval(b"0", b"0");
|
||||
eval(b"0", b"0");
|
||||
|
|
Loading…
Reference in New Issue