2020-07-03 03:32:09 -04:00
|
|
|
use lazy_static::lazy_static;
|
2021-08-06 02:19:36 -04:00
|
|
|
use std::collections::{HashMap, HashSet};
|
2020-01-06 07:36:05 -05:00
|
|
|
|
|
|
|
// Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission.
|
|
|
|
// TODO Opening tags
|
|
|
|
|
2020-07-30 00:38:40 -04:00
|
|
|
enum ClosingTagOmissionRuleIfLast {
|
2020-01-06 07:36:05 -05:00
|
|
|
// Closing tag can always be omitted if it's the last node of its parent's children.
|
|
|
|
Always,
|
|
|
|
// Closing tag can never be omitted if it's the last node of its parent's children.
|
|
|
|
Never,
|
|
|
|
// Closing tag can be omitted if it's the last node of its parent's children and the parent tag name is not one of these.
|
2020-07-03 03:32:09 -04:00
|
|
|
ParentIsNot(HashSet<&'static [u8]>),
|
2020-01-06 07:36:05 -05:00
|
|
|
}
|
|
|
|
|
2021-08-05 22:07:27 -04:00
|
|
|
// What this means in effect while parsing:
|
|
|
|
// - Given we are processing the content of some element B, which itself is inside A (e.g. <A><B>):
|
|
|
|
// - If we see `</C` and B != C:
|
|
|
|
// - If C == A and C is compatible with is_last, B is closed implicitly.
|
|
|
|
// - If we see `<C` and maybe B == C:
|
|
|
|
// - If C is in followed_by, B is closed implicitly.
|
2020-07-30 00:38:40 -04:00
|
|
|
struct ClosingTagOmissionRule {
|
2020-01-06 07:36:05 -05:00
|
|
|
// Closing tag can be omitted if immediately followed by an element node with one of these tag names.
|
2020-07-03 03:32:09 -04:00
|
|
|
followed_by: HashSet<&'static [u8]>,
|
2020-01-06 07:36:05 -05:00
|
|
|
// Closing tag can be omitted if it's the last node of its parent's children.
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast,
|
|
|
|
}
|
|
|
|
|
2020-07-24 05:05:29 -04:00
|
|
|
lazy_static! {
|
|
|
|
static ref HTML_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: HashSet::new(),
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref HEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"body");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref BODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: HashSet::new(),
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-07-03 03:32:09 -04:00
|
|
|
lazy_static! {
|
|
|
|
static ref LI_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"li");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref DT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"dt");
|
|
|
|
s.insert(b"dd");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Never,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref DD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"dd");
|
|
|
|
s.insert(b"dt");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref P_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = {
|
|
|
|
let mut followed_by = HashSet::<&'static [u8]>::new();
|
|
|
|
followed_by.insert(b"address");
|
|
|
|
followed_by.insert(b"article");
|
|
|
|
followed_by.insert(b"aside");
|
|
|
|
followed_by.insert(b"blockquote");
|
|
|
|
followed_by.insert(b"details");
|
|
|
|
followed_by.insert(b"div");
|
|
|
|
followed_by.insert(b"dl");
|
|
|
|
followed_by.insert(b"fieldset");
|
|
|
|
followed_by.insert(b"figcaption");
|
|
|
|
followed_by.insert(b"figure");
|
|
|
|
followed_by.insert(b"footer");
|
|
|
|
followed_by.insert(b"form");
|
|
|
|
followed_by.insert(b"h1");
|
|
|
|
followed_by.insert(b"h2");
|
|
|
|
followed_by.insert(b"h3");
|
|
|
|
followed_by.insert(b"h4");
|
|
|
|
followed_by.insert(b"h5");
|
|
|
|
followed_by.insert(b"h6");
|
|
|
|
followed_by.insert(b"header");
|
|
|
|
followed_by.insert(b"hgroup");
|
|
|
|
followed_by.insert(b"hr");
|
|
|
|
followed_by.insert(b"main");
|
|
|
|
followed_by.insert(b"menu");
|
|
|
|
followed_by.insert(b"nav");
|
|
|
|
followed_by.insert(b"ol");
|
|
|
|
followed_by.insert(b"p");
|
|
|
|
followed_by.insert(b"pre");
|
|
|
|
followed_by.insert(b"section");
|
|
|
|
followed_by.insert(b"table");
|
|
|
|
followed_by.insert(b"ul");
|
|
|
|
|
|
|
|
let mut is_last_tags = HashSet::<&'static [u8]>::new();
|
|
|
|
is_last_tags.insert(b"a");
|
|
|
|
is_last_tags.insert(b"audio");
|
|
|
|
is_last_tags.insert(b"del");
|
|
|
|
is_last_tags.insert(b"ins");
|
|
|
|
is_last_tags.insert(b"map");
|
|
|
|
is_last_tags.insert(b"noscript");
|
|
|
|
is_last_tags.insert(b"video");
|
|
|
|
|
2020-07-24 05:05:29 -04:00
|
|
|
ClosingTagOmissionRule {
|
|
|
|
followed_by,
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::ParentIsNot(is_last_tags),
|
|
|
|
}
|
2020-07-03 03:32:09 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref RT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"rt");
|
|
|
|
s.insert(b"rp");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref RP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"rt");
|
|
|
|
s.insert(b"rp");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
2021-08-06 02:19:36 -04:00
|
|
|
static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule =
|
|
|
|
ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"optgroup");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
2020-07-03 03:32:09 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref OPTION_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"option");
|
|
|
|
s.insert(b"optgroup");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref THEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"tbody");
|
|
|
|
s.insert(b"tfoot");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Never,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref TBODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"tbody");
|
|
|
|
s.insert(b"tfoot");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref TFOOT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: HashSet::<&'static [u8]>::new(),
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref TR_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"tr");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref TD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"td");
|
|
|
|
s.insert(b"th");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref TH_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
|
|
|
|
followed_by: {
|
|
|
|
let mut s = HashSet::<&'static [u8]>::new();
|
|
|
|
s.insert(b"td");
|
|
|
|
s.insert(b"th");
|
|
|
|
s
|
|
|
|
},
|
|
|
|
is_last: ClosingTagOmissionRuleIfLast::Always,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
2020-07-30 00:38:40 -04:00
|
|
|
static ref CLOSING_TAG_OMISSION_RULES: HashMap<&'static [u8], &'static ClosingTagOmissionRule> = {
|
2020-07-03 03:32:09 -04:00
|
|
|
let mut m = HashMap::<&'static [u8], &'static ClosingTagOmissionRule>::new();
|
2020-07-24 05:05:29 -04:00
|
|
|
m.insert(b"html", &HTML_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"head", &HEAD_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"body", &BODY_CLOSING_TAG_OMISSION_RULE);
|
2020-07-03 03:32:09 -04:00
|
|
|
m.insert(b"li", &LI_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"dt", &DT_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"dd", &DD_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"p", &P_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"rt", &RT_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"rp", &RP_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"optgroup", &OPTGROUP_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"option", &OPTION_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"thead", &THEAD_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"tbody", &TBODY_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"tfoot", &TFOOT_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"tr", &TR_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"td", &TD_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m.insert(b"th", &TH_CLOSING_TAG_OMISSION_RULE);
|
|
|
|
m
|
|
|
|
};
|
|
|
|
}
|
2020-07-30 00:38:40 -04:00
|
|
|
|
2021-08-05 22:07:27 -04:00
|
|
|
// Use an empty slice for `parent` if no parent.
|
|
|
|
pub fn can_omit_as_last_node(parent: &[u8], child: &[u8]) -> bool {
|
2021-08-06 02:19:36 -04:00
|
|
|
CLOSING_TAG_OMISSION_RULES
|
|
|
|
.get(child)
|
2020-07-30 00:38:40 -04:00
|
|
|
.filter(|r| match &r.is_last {
|
|
|
|
ClosingTagOmissionRuleIfLast::Always => true,
|
|
|
|
ClosingTagOmissionRuleIfLast::Never => false,
|
2021-08-05 22:07:27 -04:00
|
|
|
ClosingTagOmissionRuleIfLast::ParentIsNot(parents) => !parents.contains(parent),
|
2020-07-30 00:38:40 -04:00
|
|
|
})
|
|
|
|
.is_some()
|
|
|
|
}
|
|
|
|
|
2021-08-06 08:53:33 -04:00
|
|
|
// Use an empty slice for `before` or `after` if no previous/next sibling element.
|
2021-08-05 22:07:27 -04:00
|
|
|
pub fn can_omit_as_before(before: &[u8], after: &[u8]) -> bool {
|
2021-08-06 02:19:36 -04:00
|
|
|
CLOSING_TAG_OMISSION_RULES
|
|
|
|
.get(before)
|
2021-08-05 22:07:27 -04:00
|
|
|
.filter(|r| r.followed_by.contains(after))
|
2020-07-30 00:38:40 -04:00
|
|
|
.is_some()
|
|
|
|
}
|