Stream entities

This commit is contained in:
Wilson Lin 2018-08-04 00:15:44 +12:00
parent d051650527
commit caf8dcac22
6 changed files with 177 additions and 9 deletions

View File

@ -21,6 +21,7 @@
typedef enum hbe_errcode {
HBE_INTERR_PEEK_OFFSET_GEQ_ZERO = 1,
HBE_INTERR_UNKNOWN_ENTITY_TYPE,
HBE_CLI_TOO_MANY_OPTIONS = 17,
HBE_CLI_INVALID_TAG_SET,

View File

@ -219,7 +219,7 @@ int main(int argc, char **argv) {
hbe_fatal_set_autodelete(output_path);
}
hbs_content(pipe);
hbs_content(config_stream, pipe);
if (config_buffer) {
output = hbu_fstreamout_create(output_path);

View File

@ -4,6 +4,7 @@
#include "../util/hbchar.h"
#include "../util/pipe.c"
#include "./streamoptions.c"
void hbs_comment(hbu_pipe_t pipe)
{
hbu_pipe_require(pipe, '<');

View File

@ -5,13 +5,15 @@
#include "../util/pipe.c"
// Declare first before tag.c, as tag.c depends on it
void hbs_content(hbu_pipe_t pipe);
void hbs_content(hbs_options_t so, hbu_pipe_t pipe);
#include "./streamoptions.c"
#include "./tag.c"
#include "./bang.c"
#include "./comment.c"
#include "./entity.c"
void hbs_content(hbu_pipe_t pipe) {
void hbs_content(hbs_options_t so, hbu_pipe_t pipe) {
while (1) {
hb_eod_char_t c = hbu_pipe_peek_eoi(pipe);
@ -32,13 +34,12 @@ void hbs_content(hbu_pipe_t pipe) {
hbs_bang(pipe);
} else {
hbs_tag(pipe);
hbs_tag(so, pipe);
}
break;
case '&':
// TODO
hbu_pipe_accept(pipe);
hbs_entity(so, pipe);
break;
default:

160
src/main/c/stream/entity.c Normal file
View File

@ -0,0 +1,160 @@
#ifndef _HDR_HYPERBUILD_STREAM_ENTITY
#define _HDR_HYPERBUILD_STREAM_ENTITY
#include <inttypes.h>
#include "../ext/nicehash/set/int32.h"
#include "../util/hbchar.h"
#include "../util/buffer.c"
#include "../util/pipe.c"
#include "../rule/entity/entityrefs.c"
#include "../rule/char/lcalpha.c"
#include "../rule/char/ucalpha.c"
#include "../rule/char/digit.c"
#include "../rule/char/hex.c"
#define HBS_ENTITY_TYPE_NAME 1
#define HBS_ENTITY_TYPE_DECIMAL 2
#define HBS_ENTITY_TYPE_HEXADECIMAL 3
#define HBS_ENTITY_MAX_ENTITY_LENGTH 10 // Don't try to parse malformed entities forever
static void _hbs_entity_interr_unknown_entity(void) {
hbe_fatal(HBE_INTERR_UNKNOWN_ENTITY_TYPE, "INTERR $type is not a known entity type");
}
static void _hbs_entity_handle_error(hbs_options_t so, hbu_pipe_t pipe, int type, hbu_buffer_t entity_raw, int consumed_semicolon, hbe_errcode_t errcode, const char *reason) {
if (nh_set_int32_has(so->suppressed_errors, errcode)) {
switch (type) {
case -1:
hbu_pipe_write(pipe, '&');
break;
case HBS_ENTITY_TYPE_NAME:
case HBS_ENTITY_TYPE_DECIMAL:
hbu_pipe_write(pipe, '&');
// fall through
case HBS_ENTITY_TYPE_HEXADECIMAL:
hbu_pipe_write(pipe, '#');
break;
default:
_hbs_entity_interr_unknown_entity();
}
hbu_pipe_write_buffer(pipe, entity_raw);
if (consumed_semicolon) {
hbu_pipe_write(pipe, ';');
}
return;
}
hbu_pipe_error(pipe, errcode, reason);
}
void hbs_entity(hbs_options_t so, hbu_pipe_t pipe) {
hbu_pipe_require_skip(pipe, '&');
hb_char_t c = hbu_pipe_peek(pipe);
hbu_buffer_t entity_raw = hbu_buffer_create_size(HBS_ENTITY_MAX_ENTITY_LENGTH + 1);
int type = -1;
if (hbr_lcalpha_check(c) || hbr_ucalpha_check(c)) {
// Name-based entity
type = HBS_ENTITY_TYPE_NAME;
} else if (c == '#' && hbu_pipe_peek_offset(pipe, 2) == 'x') {
// Hexadecimal-based entity
// NOTE: Check before decimal-based
hbu_pipe_skip_amount(pipe, 2);
type = HBS_ENTITY_TYPE_HEXADECIMAL;
} else if (c == '#') {
// Decimal-based entity
hbu_pipe_skip(pipe);
type = HBS_ENTITY_TYPE_DECIMAL;
} else {
// Malformed entity
return _hbs_entity_handle_error(so, pipe, type, entity_raw, 0, HBE_PARSE_MALFORMED_ENTITY, "Invalid character after ampersand");
}
int under_max = 0;
for (int i = 0; i < HBS_ENTITY_MAX_ENTITY_LENGTH; i++) {
hb_char_t e = hbu_pipe_skip(pipe);
if (e == ';') {
under_max = 1;
break;
}
int well_formed = 0;
switch (type) {
case HBS_ENTITY_TYPE_NAME:
well_formed = hbr_lcalpha_check(e) || hbr_ucalpha_check(e);
break;
case HBS_ENTITY_TYPE_DECIMAL:
well_formed = hbr_digit_check(e);
break;
case HBS_ENTITY_TYPE_HEXADECIMAL:
well_formed = hbr_hex_check(e);
break;
default:
_hbs_entity_interr_unknown_entity();
}
if (!well_formed) {
return _hbs_entity_handle_error(so, pipe, type, entity_raw, 0, HBE_PARSE_MALFORMED_ENTITY, "Characters after ampersand don't form entity");
}
hbu_buffer_append(entity_raw, e);
}
if (!under_max) {
// Malformed entity (too long)
return _hbs_entity_handle_error(so, pipe, type, entity_raw, 0, HBE_PARSE_MALFORMED_ENTITY, "Malformed entity");
}
int valid = 1;
hb_char_t *entity_raw_u = hbu_buffer_underlying(entity_raw);
uintmax_t code_point;
switch (type) {
case HBS_ENTITY_TYPE_NAME:
valid = hbr_entityrefs_check(entity_raw_u);
if (valid && so->decode_entities) {
hbu_pipe_write(pipe, hbr_entityrefs_get(entity_raw_u));
}
break;
case HBS_ENTITY_TYPE_DECIMAL:
case HBS_ENTITY_TYPE_HEXADECIMAL:
code_point = strtoumax((char *) entity_raw_u, NULL, (type == HBS_ENTITY_TYPE_DECIMAL) ? 10 : 16);
valid = errno == 0 && code_point <= 0x10FFFF;
if (valid && so->decode_entities) {
valid = hbu_pipe_write_unicode(pipe, code_point);
}
break;
default:
_hbs_entity_interr_unknown_entity();
}
if (!valid) {
return _hbs_entity_handle_error(so, pipe, type, entity_raw, 1, HBE_PARSE_INVALID_ENTITY, "Invalid entity");
}
}
#endif // _HDR_HYPERBUILD_STREAM_ENTITY

View File

@ -12,13 +12,15 @@
#include "./helper/tagname.c"
#include "./helper/attr.c"
#include "./helper/script.c"
#include "./helper/style.c"
// Declare first before content.c, as content.c depends on it
void hbs_tag(hbu_pipe_t pipe);
void hbs_tag(hbs_options_t so, hbu_pipe_t pipe);
#include "./streamoptions.c"
#include "./content.c"
void hbs_tag(hbu_pipe_t pipe) {
void hbs_tag(hbs_options_t so, hbu_pipe_t pipe) {
int self_closing = 0;
hbu_pipe_require(pipe, '<');
@ -50,9 +52,12 @@ void hbs_tag(hbu_pipe_t pipe) {
if (hbu_buffer_compare_lit(opening_name, "script") == 0) {
// Script tag
hbsh_script(pipe);
} else if (hbu_buffer_compare_lit(opening_name, "style") == 0) {
// Style tag
hbsh_style(pipe);
} else {
// Content
hbs_content(pipe);
hbs_content(so, pipe);
}
// Closing tag for non-void