Make bare ampersands a separate error

This commit is contained in:
Wilson Lin 2018-08-08 11:09:06 +12:00
parent 6ca4363936
commit 6db5e5c0a7
4 changed files with 21 additions and 2 deletions

View File

@ -42,13 +42,21 @@ Entities must be of one of the following forms:
- `&nnnn;`, where *nnnn* is a Unicode code point in base 10
- `&#xhhhh;`, where *hhhh* is a Unicode code point in base 16
A malformed entity is an ampersand not followed by a sequence of characters that matches one of the above forms. This includes when the semicolon is missing, and bare ampersands (i.e. followed by whitespace or as the last character).
A malformed entity is an ampersand not followed by a sequence of characters that matches one of the above forms. This includes when the semicolon is missing.
Note that this is different from `HBE_PARSE_INVALID_ENTITY`, which is when a well-formed entity references a non-existent entity name or Unicode code point.
While an ampersand by itself (i.e. followed by whitespace or as the last character) is a malformed entity, it is covered by `HBE_PARSE_BARE_AMPERSAND`.
#### `HBE_PARSE_BARE_AMPERSAND`
It's an error to have an ampersand followed by whitespace or as the last character.
This is intentionally a different error to `HBE_PARSE_MALFORMED_ENTITY` due to the ubiquity of bare ampersands.
An ampersand by itself is not *necessarily* an invalid entity. However, HTML parsers and browsers may have different interpretations of bare ampersands, so it's a good idea to always use the encoded form (`&`).
When this error is suppressed, malformed entities are outputted untouched.
When this error is suppressed, bare ampersands are outputted untouched.
#### `HBE_PARSE_INVALID_ENTITY`

View File

@ -36,6 +36,7 @@ typedef enum hbe_errcode {
HBE_MEM_ALLOC_FAIL,
HBE_PARSE_MALFORMED_ENTITY = 65,
HBE_PARSE_BARE_AMPERSAND,
HBE_PARSE_INVALID_ENTITY,
HBE_PARSE_NONSTANDARD_TAG,
HBE_PARSE_UCASE_TAG,

View File

@ -92,6 +92,8 @@ static void _parse_and_add_errors_to_suppress(nh_set_int32_t suppressed_errors,
if (hbu_buffer_compare_lit(part, "MALFORMED_ENTITY") == 0) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_MALFORMED_ENTITY);
} else if (hbu_buffer_compare_lit(part, "BARE_AMPERSAND") == 0) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_BARE_AMPERSAND);
} else if (hbu_buffer_compare_lit(part, "INVALID_ENTITY") == 0) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_INVALID_ENTITY);
} else if (hbu_buffer_compare_lit(part, "NONSTANDARD_TAG") == 0) {

View File

@ -54,6 +54,14 @@ static void _hbs_entity_handle_error(hbs_options_t so, hbu_pipe_t pipe, int type
void hbs_entity(hbs_options_t so, hbu_pipe_t pipe) {
hbu_pipe_require_skip(pipe, '&');
// Quickly check and short circuit if BARE_AMPERSAND is suppressed
// and next character is whitespace
if (hbs_options_supressed_error(so, HBE_PARSE_BARE_AMPERSAND) &&
hbr_whitespace_check(hbu_pipe_peek_eoi(pipe))) {
hbu_pipe_write(pipe, '&');
return;
}
hb_char_t c = hbu_pipe_peek(pipe);
// _hbs_entity_handle_error will free this in case of error