summaryrefslogtreecommitdiff
path: root/Userland/Libraries
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2021-09-14 23:49:45 +0200
committerAndreas Kling <kling@serenityos.org>2021-09-14 23:49:45 +0200
commit70398645f37913964626c99def6fe9d9e332a79c (patch)
tree53c37fa43ae9e1cf58155b200591df57ec8b428d /Userland/Libraries
parentfad825e94e831d5b215420d3f53fc46f84e981a6 (diff)
downloadserenity-70398645f37913964626c99def6fe9d9e332a79c.zip
LibWeb: Improvements to error handling in HTML foreign content parsing
Follow the spec more closely when encountering an invalid start or end tag during foreign content parsing.
Diffstat (limited to 'Userland/Libraries')
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp40
1 files changed, 35 insertions, 5 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
index cf9663fa02..dad672ce76 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
@@ -95,6 +95,29 @@ static Vector<FlyString> s_quirks_public_ids = {
"-//WebTechs//DTD Mozilla HTML//"
};
+// https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point
+static bool is_mathml_text_integration_point(DOM::Element const&)
+{
+ // FIXME: Implement.
+ return false;
+}
+
+// https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point
+static bool is_html_integration_point(DOM::Element const& element)
+{
+ // A node is an HTML integration point if it is one of the following elements:
+ // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
+ // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
+
+ // An SVG foreignObject element
+ // An SVG desc element
+ // An SVG title element
+ if (element.tag_name().is_one_of(SVG::TagNames::foreignObject, SVG::TagNames::desc, SVG::TagNames::title))
+ return true;
+
+ return false;
+}
+
RefPtr<DOM::Document> parse_html_document(const StringView& data, const AK::URL& url, const String& encoding)
{
auto document = DOM::Document::create(url);
@@ -2809,17 +2832,24 @@ void HTMLDocumentParser::process_using_the_rules_for_foreign_content(HTMLToken&
}
if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::blockquote, HTML::TagNames::body, HTML::TagNames::br, HTML::TagNames::center, HTML::TagNames::code, HTML::TagNames::dd, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::dt, HTML::TagNames::em, HTML::TagNames::embed, HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6, HTML::TagNames::head, HTML::TagNames::hr, HTML::TagNames::i, HTML::TagNames::img, HTML::TagNames::li, HTML::TagNames::listing, HTML::TagNames::menu, HTML::TagNames::meta, HTML::TagNames::nobr, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::pre, HTML::TagNames::ruby, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::span, HTML::TagNames::strong, HTML::TagNames::strike, HTML::TagNames::sub, HTML::TagNames::sup, HTML::TagNames::table, HTML::TagNames::tt, HTML::TagNames::u, HTML::TagNames::ul, HTML::TagNames::var))
- || (token.is_start_tag() && token.tag_name() == HTML::TagNames::font && (token.has_attribute(HTML::AttributeNames::color) || token.has_attribute(HTML::AttributeNames::face) || token.has_attribute(HTML::AttributeNames::size)))) {
+ || (token.is_start_tag() && token.tag_name() == HTML::TagNames::font && (token.has_attribute(HTML::AttributeNames::color) || token.has_attribute(HTML::AttributeNames::face) || token.has_attribute(HTML::AttributeNames::size)))
+ || (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::br, HTML::TagNames::p))) {
log_parse_error();
- if (m_parsing_fragment) {
- goto AnyOtherStartTag;
+
+ // While the current node is not a MathML text integration point, an HTML integration point, or an element in the HTML namespace, pop elements from the stack of open elements.
+ while (!is_mathml_text_integration_point(current_node())
+ && !is_html_integration_point(current_node())
+ && current_node().namespace_() != Namespace::HTML) {
+ m_stack_of_open_elements.pop();
}
- TODO();
+ // Reprocess the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
+ process_using_the_rules_for(m_insertion_mode, token);
+ return;
}
+ // Any other start tag
if (token.is_start_tag()) {
- AnyOtherStartTag:
if (adjusted_current_node().namespace_() == Namespace::MathML) {
adjust_mathml_attributes(token);
} else if (adjusted_current_node().namespace_() == Namespace::SVG) {