Suppressable errors config

This commit is contained in:
Wilson Lin 2018-08-03 23:02:10 +12:00
parent cdad4f95e6
commit dee2e03b21
4 changed files with 68 additions and 8 deletions

View File

@ -30,12 +30,34 @@ Current limitations:
### Errors
Errors marked with a `⌫` can be suppressed using the [`--suppress`](#--suppress) option.
Use the error name without the `HBE_PARSE_` prefix.
#### `HBE_PARSE_MALFORMED_ENTITY`
It's an error if the sequence of characters following an ampersand (`&`) does not form a valid entity.
Technically, entities must be of one of the following forms:
- `&name;`, where *name* is a reference to a valid HTML entity
- `&nnnn;`, where *nnnn* is a Unicode code point in base 10
- `&#xhhhh;`, where *hhhh* is a Unicode code point in base 16
A malformed entity is an ampersand not followed by a sequence of characters that matches one of the above forms. This includes when the semicolon is missing, and bare ampersands (i.e. followed by whitespace or as the last character).
Note that this is different from `HBE_PARSE_INVALID_ENTITY`, which is when a well-formed entity references a non-existent entity or Unicode code point.
An ampersand by itself is not *necessarily* an invalid entity. However, HTML parsers and browsers may have different interpretations of bare ampersands, so it's a good idea to always use the encoded form (`&`).
When this error is suppressed, malformed entities are outputted untouched.
#### `HBE_PARSE_INVALID_ENTITY`
It's an error if an invalid HTML entity is detected.
If suppressed, invalid entities are simply interpreted literally.
If suppressed, invalid entities are outputted untouched.
See [entityrefs.c](src/main/c/rule/entity/entityrefs.c) for the list of entity references considered valid by hyperbuild.
Valid entities that reference a Unicode code point must be between 0x0 and 0x10FFFF (inclusive).
#### `HBE_PARSE_NONSTANDARD_TAG`
@ -59,7 +81,7 @@ This means that `` ` `` and `'` are not valid quote marks.
#### `HBE_PARSE_ILLEGAL_CHILD`
It's an error if a tag is declared where it can't be a child of.
This is a very simple check, and does not cover the comprehensive HTML rules, as they involve backtracking, tree traversal, and lots of conditionals.
This is a very simple check, and does not cover the comprehensive HTML rules, which involve backtracking, tree traversal, and lots of conditionals.
This rule is enforced in four parts:
[whitelistparents.c](src/main/c/rule/relation/whitelistparents.c),
@ -110,7 +132,7 @@ This applies even when the output is `stdout`.
Suppress errors specified by this option. hyperbuild will quitely ignore and continue processing when otherwise one of the provided errors would occur.
Separate the error names with commas. Suppressible errors are marked with a `⌫` in the [Errors](#errors) section.
Omit the `HBE_PARSE_` prefix. Separate the error names with commas. Suppressible errors are marked with a `⌫` in the [Errors](#errors) section.
## Processing

View File

@ -25,13 +25,15 @@ typedef enum hbe_errcode {
HBE_CLI_TOO_MANY_OPTIONS = 17,
HBE_CLI_INVALID_TAG_SET,
HBE_CLI_INVALID_TAG,
HBE_CLI_INVALID_SUPPRESSABLE_ERROR,
HBE_IO_FOPEN_FAIL = 33,
HBE_IO_FCLOSE_FAIL,
HBE_IO_FREAD_FAIL,
HBE_IO_FWRITE_FAIL,
HBE_PARSE_INVALID_ENTITY = 65,
HBE_PARSE_MALFORMED_ENTITY = 65,
HBE_PARSE_INVALID_ENTITY,
HBE_PARSE_NONSTANDARD_TAG,
HBE_PARSE_UCASE_TAG,
HBE_PARSE_UCASE_ATTR,

View File

@ -8,6 +8,7 @@
#include "rule/init.c"
#include "ext/nicehash/set/str.h"
#include "ext/nicehash/set/int32.h"
#include "datastructure/list/buffer.h"
#include "util/fstreamin.c"
@ -77,6 +78,33 @@ static nh_set_str_t _parse_list_of_tags(char *argv) {
return set;
}
static void _parse_and_add_errors_to_suppress(nh_set_int32_t suppressed_errors, char *argv) {
if (argv == NULL) {
return;
}
hb_bufferlist_t list = hb_bufferlist_create_from_split((hb_char_t *) argv, ',');
for (size_t i = 0; i < list->length; i++) {
hbu_buffer_t part = hb_bufferlist_get(list, i);
hb_char_t *part_c = hbu_buffer_underlying(part);
if (hbu_buffer_compare_lit(part, "MALFORMED_ENTITY")) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_MALFORMED_ENTITY);
} else if (hbu_buffer_compare_lit(part, "INVALID_ENTITY")) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_INVALID_ENTITY);
} else if (hbu_buffer_compare_lit(part, "NONSTANDARD_TAG")) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_NONSTANDARD_TAG);
} else if (hbu_buffer_compare_lit(part, "UCASE_TAG")) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_UCASE_TAG);
} else if (hbu_buffer_compare_lit(part, "UNQUOTED_ATTR")) {
nh_set_int32_add(suppressed_errors, HBE_PARSE_UNQUOTED_ATTR);
} else {
hbe_fatal(HBE_CLI_INVALID_SUPPRESSABLE_ERROR, "Unrecognised suppressable error `%s`", part);
}
}
}
int main(int argc, char **argv) {
// Set up rules
hbr_init();
@ -96,6 +124,7 @@ int main(int argc, char **argv) {
{"verbose", no_argument, NULL, 'v'},
{"input", required_argument, NULL, 'i'},
{"output", required_argument, NULL, 'o'},
{"suppress", optional_argument, NULL, 's'},
{"MXcollapseWhitespace", optional_argument, NULL, 1},
{"MXdestroyWholeWhitespace", optional_argument, NULL, 2},
@ -113,7 +142,7 @@ int main(int argc, char **argv) {
};
int option_index = 0;
int c = getopt_long(argc, argv, "kbvi:o:", long_options, &option_index);
int c = getopt_long(argc, argv, "kbvi:o:s:", long_options, &option_index);
if (c == -1) {
if (optind != argc) {
@ -143,6 +172,10 @@ int main(int argc, char **argv) {
hbe_info_toggle(1);
break;
case 's':
_parse_and_add_errors_to_suppress(&(config_stream->suppressed_errors), optarg);
break;
case 1:
config_stream->ex_collapse_whitespace = _parse_list_of_tags(optarg);
break;

View File

@ -4,11 +4,13 @@
#include "../error/error.c"
#include "../util/hbchar.h"
#include "../ext/nicehash/set/str.h"
#include "../ext/nicehash/set/int32.h"
typedef struct hbs_options_s {
nh_set_str_t ex_collapse_whitespace; // Could be NULL to represent the universal set (i.e. don't enable)
nh_set_str_t ex_destroy_whole_whitespace; // Could be NULL to represent the universal set (i.e. don't enable)
nh_set_str_t ex_trim_whitespace; // Could be NULL to represent the universal set (i.e. don't enable)
nh_set_str_t ex_collapse_whitespace; // Could be NULL to represent the universal set (i.e. don't minify)
nh_set_str_t ex_destroy_whole_whitespace; // Could be NULL to represent the universal set (i.e. don't minify)
nh_set_str_t ex_trim_whitespace; // Could be NULL to represent the universal set (i.e. don't minify)
nh_set_int32_t suppressed_errors;
int trim_class_attr;
int decode_entities;
int min_conditional_comments;
@ -45,6 +47,7 @@ hbs_options_t hbs_options_create(void) {
opt->ex_collapse_whitespace = _hbs_options_default_ex_collapse_whitespace();
opt->ex_destroy_whole_whitespace = _hbs_options_default_ex_destroy_whole_whitespace();
opt->ex_trim_whitespace = _hbs_options_default_ex_trim_whitespace();
opt->suppressed_errors = nh_set_int32_create();
opt->trim_class_attr = 1;
opt->decode_entities = 1;
opt->min_conditional_comments = 1;