Make bare ampersands a separate error
This commit is contained in:
parent
6ca4363936
commit
6db5e5c0a7
12
README.md
12
README.md
|
@ -42,13 +42,21 @@ Entities must be of one of the following forms:
|
|||
- `&nnnn;`, where *nnnn* is a Unicode code point in base 10
|
||||
- `&#xhhhh;`, where *hhhh* is a Unicode code point in base 16
|
||||
|
||||
A malformed entity is an ampersand not followed by a sequence of characters that matches one of the above forms. This includes when the semicolon is missing, and bare ampersands (i.e. followed by whitespace or as the last character).
|
||||
A malformed entity is an ampersand not followed by a sequence of characters that matches one of the above forms. This includes when the semicolon is missing.
|
||||
|
||||
Note that this is different from `HBE_PARSE_INVALID_ENTITY`, which is when a well-formed entity references a non-existent entity name or Unicode code point.
|
||||
|
||||
While an ampersand by itself (i.e. followed by whitespace or as the last character) is a malformed entity, it is covered by `HBE_PARSE_BARE_AMPERSAND`.
|
||||
|
||||
#### `HBE_PARSE_BARE_AMPERSAND` ⌫
|
||||
|
||||
It's an error to have an ampersand followed by whitespace or as the last character.
|
||||
|
||||
This is intentionally a different error to `HBE_PARSE_MALFORMED_ENTITY` due to the ubiquity of bare ampersands.
|
||||
|
||||
An ampersand by itself is not *necessarily* an invalid entity. However, HTML parsers and browsers may have different interpretations of bare ampersands, so it's a good idea to always use the encoded form (`&`).
|
||||
|
||||
When this error is suppressed, malformed entities are outputted untouched.
|
||||
When this error is suppressed, bare ampersands are outputted untouched.
|
||||
|
||||
#### `HBE_PARSE_INVALID_ENTITY` ⌫
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ typedef enum hbe_errcode {
|
|||
HBE_MEM_ALLOC_FAIL,
|
||||
|
||||
HBE_PARSE_MALFORMED_ENTITY = 65,
|
||||
HBE_PARSE_BARE_AMPERSAND,
|
||||
HBE_PARSE_INVALID_ENTITY,
|
||||
HBE_PARSE_NONSTANDARD_TAG,
|
||||
HBE_PARSE_UCASE_TAG,
|
||||
|
|
|
@ -92,6 +92,8 @@ static void _parse_and_add_errors_to_suppress(nh_set_int32_t suppressed_errors,
|
|||
|
||||
if (hbu_buffer_compare_lit(part, "MALFORMED_ENTITY") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_MALFORMED_ENTITY);
|
||||
} else if (hbu_buffer_compare_lit(part, "BARE_AMPERSAND") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_BARE_AMPERSAND);
|
||||
} else if (hbu_buffer_compare_lit(part, "INVALID_ENTITY") == 0) {
|
||||
nh_set_int32_add(suppressed_errors, HBE_PARSE_INVALID_ENTITY);
|
||||
} else if (hbu_buffer_compare_lit(part, "NONSTANDARD_TAG") == 0) {
|
||||
|
|
|
@ -54,6 +54,14 @@ static void _hbs_entity_handle_error(hbs_options_t so, hbu_pipe_t pipe, int type
|
|||
void hbs_entity(hbs_options_t so, hbu_pipe_t pipe) {
|
||||
hbu_pipe_require_skip(pipe, '&');
|
||||
|
||||
// Quickly check and short circuit if BARE_AMPERSAND is suppressed
|
||||
// and next character is whitespace
|
||||
if (hbs_options_supressed_error(so, HBE_PARSE_BARE_AMPERSAND) &&
|
||||
hbr_whitespace_check(hbu_pipe_peek_eoi(pipe))) {
|
||||
hbu_pipe_write(pipe, '&');
|
||||
return;
|
||||
}
|
||||
|
||||
hb_char_t c = hbu_pipe_peek(pipe);
|
||||
|
||||
// _hbs_entity_handle_error will free this in case of error
|
||||
|
|
Loading…
Reference in New Issue