diff options
author | stelar7 <dudedbz@gmail.com> | 2020-06-21 06:58:03 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-06-21 10:13:31 +0200 |
commit | 5eb39a5f61a2617e795b26ba95bc73d83d43a195 (patch) | |
tree | 7f127ed1843d90e78966bbd6425552127b831ad8 /Libraries | |
parent | 8e24a17d0d7f6ebe5fe7b9e242ce8e696ceeacbf (diff) | |
download | serenity-5eb39a5f61a2617e795b26ba95bc73d83d43a195.zip |
LibWeb: Update parser with more insertion modes :^)
Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.
Diffstat (limited to 'Libraries')
-rw-r--r-- | Libraries/LibWeb/DOM/TagNames.h | 4 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.cpp | 450 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.h | 9 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLToken.cpp | 2 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLToken.h | 34 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 2 |
6 files changed, 471 insertions, 30 deletions
diff --git a/Libraries/LibWeb/DOM/TagNames.h b/Libraries/LibWeb/DOM/TagNames.h index d332fe8d0d..55fefe2cd1 100644 --- a/Libraries/LibWeb/DOM/TagNames.h +++ b/Libraries/LibWeb/DOM/TagNames.h @@ -87,6 +87,7 @@ void initialize(); __ENUMERATE_HTML_TAG(html) \ __ENUMERATE_HTML_TAG(i) \ __ENUMERATE_HTML_TAG(iframe) \ + __ENUMERATE_HTML_TAG(image) \ __ENUMERATE_HTML_TAG(img) \ __ENUMERATE_HTML_TAG(input) \ __ENUMERATE_HTML_TAG(keygen) \ @@ -95,6 +96,7 @@ void initialize(); __ENUMERATE_HTML_TAG(listing) \ __ENUMERATE_HTML_TAG(main) \ __ENUMERATE_HTML_TAG(marquee) \ + __ENUMERATE_HTML_TAG(math) \ __ENUMERATE_HTML_TAG(menu) \ __ENUMERATE_HTML_TAG(meta) \ __ENUMERATE_HTML_TAG(nav) \ @@ -110,6 +112,7 @@ void initialize(); __ENUMERATE_HTML_TAG(param) \ __ENUMERATE_HTML_TAG(plaintext) \ __ENUMERATE_HTML_TAG(pre) \ + __ENUMERATE_HTML_TAG(ruby) \ __ENUMERATE_HTML_TAG(rb) \ __ENUMERATE_HTML_TAG(rp) \ __ENUMERATE_HTML_TAG(rt) \ @@ -125,6 +128,7 @@ void initialize(); __ENUMERATE_HTML_TAG(strong) \ __ENUMERATE_HTML_TAG(style) \ __ENUMERATE_HTML_TAG(summary) \ + __ENUMERATE_HTML_TAG(svg) \ __ENUMERATE_HTML_TAG(table) \ __ENUMERATE_HTML_TAG(tbody) \ __ENUMERATE_HTML_TAG(td) \ diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp index 01ed321995..8bee4459f3 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp @@ -155,6 +155,18 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok case InsertionMode::InColumnGroup: handle_in_column_group(token); break; + case InsertionMode::InTemplate: + handle_in_template(token); + break; + case InsertionMode::InFrameset: + handle_in_frameset(token); + break; + case InsertionMode::AfterFrameset: + handle_after_frameset(token); + break; + case InsertionMode::AfterAfterFrameset: + handle_after_after_frameset(token); + break; default: ASSERT_NOT_REACHED(); } @@ -254,7 +266,7 @@ NonnullRefPtr<Element> HTMLDocumentParser::create_element_for(HTMLToken& token) { auto element = create_element(document(), token.tag_name()); for (auto& attribute : token.m_tag.attributes) { - element->set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string()); + element->set_attribute(attribute.local_name_builder.to_string(), attribute.value_builder.to_string()); } return element; } @@ -372,6 +384,11 @@ void HTMLDocumentParser::handle_in_head(HTMLToken& token) return; } + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noscript && !m_scripting_enabled) { + insert_html_element(token); + m_insertion_mode = InsertionMode::InHeadNoscript; + } + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::script) { auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); auto element = create_element_for(token); @@ -401,7 +418,7 @@ void HTMLDocumentParser::handle_in_head(HTMLToken& token) } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { - TODO(); + goto AnythingElse; } if (token.is_start_tag() && token.tag_name() == HTML::TagNames::template_) { @@ -422,14 +439,49 @@ void HTMLDocumentParser::handle_in_head(HTMLToken& token) return; } +AnythingElse: m_stack_of_open_elements.pop(); m_insertion_mode = InsertionMode::AfterHead; process_using_the_rules_for(m_insertion_mode, token); } -void HTMLDocumentParser::handle_in_head_noscript(HTMLToken&) +void HTMLDocumentParser::handle_in_head_noscript(HTMLToken& token) { - TODO(); + if (token.is_doctype()) { + PARSE_ERROR(); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { + process_using_the_rules_for(InsertionMode::InBody, token); + return; + } + + if (token.is_end_tag() && token.tag_name() == HTML::TagNames::noscript) { + m_stack_of_open_elements.pop(); + m_insertion_mode = InsertionMode::InHead; + return; + } + + if (token.is_parser_whitespace() || token.is_comment() || (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::style))) { + process_using_the_rules_for(InsertionMode::InHead, token); + return; + } + + if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { + goto AnythingElse; + } + + if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::noscript)) { + PARSE_ERROR(); + return; + } + +AnythingElse: + PARSE_ERROR(); + m_stack_of_open_elements.pop(); + m_insertion_mode = InsertionMode::InHead; + process_using_the_rules_for(m_insertion_mode, token); } void HTMLDocumentParser::parse_generic_raw_text_element(HTMLToken& token) @@ -524,7 +576,7 @@ void HTMLDocumentParser::handle_after_head(HTMLToken& token) } if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { - TODO(); + process_using_the_rules_for(InsertionMode::InHead, token); } if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { @@ -568,7 +620,10 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token) } if (token.is_comment()) { - TODO(); + auto data = token.m_comment_or_character.data.to_string(); + auto& insertion_location = m_stack_of_open_elements.first(); + insertion_location.append_child(adopt(*new Comment(document(), data))); + return; } if (token.is_doctype()) { @@ -581,11 +636,6 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token) return; } - if (token.is_end_of_file()) { - stop_parsing(); - return; - } - if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { if (m_parsing_fragment) { TODO(); @@ -594,6 +644,11 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token) return; } + if (token.is_end_of_file()) { + stop_parsing(); + return; + } + PARSE_ERROR(); m_insertion_mode = InsertionMode::InBody; process_using_the_rules_for(InsertionMode::InBody, token); @@ -845,9 +900,9 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) if (m_stack_of_open_elements.contains(HTML::TagNames::template_)) return; for (auto& attribute : token.m_tag.attributes) { - if (current_node().has_attribute(attribute.name_builder.string_view())) + if (current_node().has_attribute(attribute.local_name_builder.string_view())) continue; - current_node().set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string()); + current_node().set_attribute(attribute.local_name_builder.to_string(), attribute.value_builder.to_string()); } return; } @@ -870,9 +925,9 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) } m_frameset_ok = false; for (auto& attribute : token.m_tag.attributes) { - if (node_before_current_node().has_attribute(attribute.name_builder.string_view())) + if (node_before_current_node().has_attribute(attribute.local_name_builder.string_view())) continue; - node_before_current_node().set_attribute(attribute.name_builder.to_string(), attribute.value_builder.to_string()); + node_before_current_node().set_attribute(attribute.local_name_builder.to_string(), attribute.value_builder.to_string()); } return; } @@ -1281,7 +1336,7 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) return; } - if (token.is_start_tag() && token.tag_name().equals_ignoring_case("image")) { + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) { // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.) PARSE_ERROR(); token.m_tag.tag_name.clear(); @@ -1361,24 +1416,54 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rb, HTML::TagNames::rtc)) { - TODO(); + if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) + generate_implied_end_tags(); + + if (current_node().tag_name() != HTML::TagNames::ruby) + PARSE_ERROR(); + + insert_html_element(token); } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rp, HTML::TagNames::rt)) { - TODO(); + if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) + generate_implied_end_tags(HTML::TagNames::rtc); + + if (current_node().tag_name() != HTML::TagNames::rtc || current_node().tag_name() != HTML::TagNames::ruby) + PARSE_ERROR(); + + insert_html_element(token); } - if (token.is_start_tag() && token.tag_name() == "math") { + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::math) { dbg() << "<math> element encountered."; reconstruct_the_active_formatting_elements(); + adjust_mathml_attributes(token); + adjust_foreign_attributes(token); + + // FIXME: this should insert a foreign element, but lets just insert it normally for now :^) insert_html_element(token); + + if (token.is_self_closing()) { + m_stack_of_open_elements.pop(); + token.acknowledge_self_closing_flag_if_set(); + } return; } - if (token.is_start_tag() && token.tag_name() == "svg") { + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::svg) { dbg() << "<svg> element encountered."; reconstruct_the_active_formatting_elements(); + adjust_svg_attributes(token); + adjust_foreign_attributes(token); + + // FIXME: this should insert a foreign element, but lets just insert it normally for now :^) insert_html_element(token); + + if (token.is_self_closing()) { + m_stack_of_open_elements.pop(); + token.acknowledge_self_closing_flag_if_set(); + } return; } @@ -1418,7 +1503,92 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) return; } - TODO(); +} + +void HTMLDocumentParser::adjust_mathml_attributes(HTMLToken& token) +{ + token.adjust_attribute_name("definitionurl", "definitionURL"); +} + +void HTMLDocumentParser::adjust_svg_attributes(HTMLToken& token) +{ + token.adjust_attribute_name("attributename", "attributeName"); + token.adjust_attribute_name("attributetype", "attributeType"); + token.adjust_attribute_name("basefrequency", "baseFrequency"); + token.adjust_attribute_name("baseprofile", "baseProfile"); + token.adjust_attribute_name("calcmode", "calcMode"); + token.adjust_attribute_name("clippathunits", "clipPathUnits"); + token.adjust_attribute_name("diffuseconstant", "diffuseConstant"); + token.adjust_attribute_name("edgemode", "edgeMode"); + token.adjust_attribute_name("filterunits", "filterUnits"); + token.adjust_attribute_name("glyphref", "glyphRef"); + token.adjust_attribute_name("gradienttransform", "gradientTransform"); + token.adjust_attribute_name("gradientunits", "gradientUnits"); + token.adjust_attribute_name("kernelmatrix", "kernelMatrix"); + token.adjust_attribute_name("kernelunitlength", "kernelUnitLength"); + token.adjust_attribute_name("keypoints", "keyPoints"); + token.adjust_attribute_name("keysplines", "keySplines"); + token.adjust_attribute_name("keytimes", "keyTimes"); + token.adjust_attribute_name("lengthadjust", "lengthAdjust"); + token.adjust_attribute_name("limitingconeangle", "limitingConeAngle"); + token.adjust_attribute_name("markerheight", "markerHeight"); + token.adjust_attribute_name("markerunits", "markerUnits"); + token.adjust_attribute_name("markerwidth", "markerWidth"); + token.adjust_attribute_name("maskcontentunits", "maskContentUnits"); + token.adjust_attribute_name("maskunits", "maskUnits"); + token.adjust_attribute_name("numoctaves", "numOctaves"); + token.adjust_attribute_name("pathlength", "pathLength"); + token.adjust_attribute_name("patterncontentunits", "patternContentUnits"); + token.adjust_attribute_name("patterntransform", "patternTransform"); + token.adjust_attribute_name("patternunits", "patternUnits"); + token.adjust_attribute_name("pointsatx", "pointsAtX"); + token.adjust_attribute_name("pointsaty", "pointsAtY"); + token.adjust_attribute_name("pointsatz", "pointsAtZ"); + token.adjust_attribute_name("preservealpha", "preserveAlpha"); + token.adjust_attribute_name("preserveaspectratio", "preserveAspectRatio"); + token.adjust_attribute_name("primitiveunits", "primitiveUnits"); + token.adjust_attribute_name("refx", "refX"); + token.adjust_attribute_name("refy", "refY"); + token.adjust_attribute_name("repeatcount", "repeatCount"); + token.adjust_attribute_name("repeatdur", "repeatDur"); + token.adjust_attribute_name("requiredextensions", "requiredExtensions"); + token.adjust_attribute_name("requiredfeatures", "requiredFeatures"); + token.adjust_attribute_name("specularconstant", "specularConstant"); + token.adjust_attribute_name("specularexponent", "specularExponent"); + token.adjust_attribute_name("spreadmethod", "spreadMethod"); + token.adjust_attribute_name("startoffset", "startOffset"); + token.adjust_attribute_name("stddeviation", "stdDeviation"); + token.adjust_attribute_name("stitchtiles", "stitchTiles"); + token.adjust_attribute_name("surfacescale", "surfaceScale"); + token.adjust_attribute_name("systemlanguage", "systemLanguage"); + token.adjust_attribute_name("tablevalues", "tableValues"); + token.adjust_attribute_name("targetx", "targetX"); + token.adjust_attribute_name("targety", "targetY"); + token.adjust_attribute_name("textlength", "textLength"); + token.adjust_attribute_name("viewbox", "viewBox"); + token.adjust_attribute_name("viewtarget", "viewTarget"); + token.adjust_attribute_name("xchannelselector", "xChannelSelector"); + token.adjust_attribute_name("ychannelselector", "yChannelSelector"); + token.adjust_attribute_name("zoomandpan", "zoomAndPan"); +} +void HTMLDocumentParser::adjust_foreign_attributes(HTMLToken& token) +{ + auto xlink_namespace = "http://www.w3.org/1999/xlink"; + token.adjust_foreign_attribute("xlink:actuate", "xlink", "actuate", xlink_namespace); + token.adjust_foreign_attribute("xlink:arcrole", "xlink", "arcrole", xlink_namespace); + token.adjust_foreign_attribute("xlink:href", "xlink", "href", xlink_namespace); + token.adjust_foreign_attribute("xlink:role", "xlink", "role", xlink_namespace); + token.adjust_foreign_attribute("xlink:show", "xlink", "show", xlink_namespace); + token.adjust_foreign_attribute("xlink:title", "xlink", "title", xlink_namespace); + token.adjust_foreign_attribute("xlink:type", "xlink", "type", xlink_namespace); + + auto xml_namespace = "http://www.w3.org/XML/1998/namespace"; + token.adjust_foreign_attribute("xml:lang", "xml", "lang", xml_namespace); + token.adjust_foreign_attribute("xml:space", "xml", "space", xml_namespace); + + auto xmlns_namespace = "http://www.w3.org/2000/xmlns/"; + token.adjust_foreign_attribute("xmlns", "", "xmlns", xmlns_namespace); + token.adjust_foreign_attribute("xmlns:xlink", "xmlns", "xlink", xmlns_namespace); } void HTMLDocumentParser::increment_script_nesting_level() @@ -1714,7 +1884,13 @@ void HTMLDocumentParser::handle_in_table_body(HTMLToken& token) if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) { - // FIXME: If the stack of open elements does not have a tbody, thead, or tfoot element in table scope, this is a parse error; ignore the token. + + if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tbody) + || !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::thead) + || !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tfoot)) { + PARSE_ERROR(); + return; + } clear_the_stack_back_to_a_table_body_context(); m_stack_of_open_elements.pop(); @@ -1814,7 +1990,7 @@ void HTMLDocumentParser::handle_in_table(HTMLToken& token) return; } if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::template_)) - || (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_)) { + || (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_)) { process_using_the_rules_for(InsertionMode::InHead, token); return; } @@ -1860,8 +2036,27 @@ AnythingElse: void HTMLDocumentParser::handle_in_select_in_table(HTMLToken& token) { - (void)token; - TODO(); + if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::td, HTML::TagNames::th)) { + PARSE_ERROR(); + m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); + reset_the_insertion_mode_appropriately(); + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::td, HTML::TagNames::th)) { + PARSE_ERROR(); + + if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) + return; + + m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); + reset_the_insertion_mode_appropriately(); + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + process_using_the_rules_for(InsertionMode::InSelect, token); } void HTMLDocumentParser::handle_in_select(HTMLToken& token) @@ -2005,7 +2200,7 @@ void HTMLDocumentParser::handle_in_caption(HTMLToken& token) } if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) - || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) { + || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) { if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) { PARSE_ERROR(); return; @@ -2097,6 +2292,209 @@ void HTMLDocumentParser::handle_in_column_group(HTMLToken& token) process_using_the_rules_for(m_insertion_mode, token); } +void HTMLDocumentParser::handle_in_template(HTMLToken& token) +{ + if (token.is_character() || token.is_comment() || token.is_doctype()) { + process_using_the_rules_for(InsertionMode::InBody, token); + return; + } + + if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { + process_using_the_rules_for(InsertionMode::InHead, token); + return; + } + + if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { + process_using_the_rules_for(InsertionMode::InHead, token); + return; + } + + if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) { + m_stack_of_template_insertion_modes.take_last(); + m_stack_of_template_insertion_modes.append(InsertionMode::InTable); + m_insertion_mode = InsertionMode::InTable; + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) { + m_stack_of_template_insertion_modes.take_last(); + m_stack_of_template_insertion_modes.append(InsertionMode::InColumnGroup); + m_insertion_mode = InsertionMode::InColumnGroup; + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::tr) { + m_stack_of_template_insertion_modes.take_last(); + m_stack_of_template_insertion_modes.append(InsertionMode::InTableBody); + m_insertion_mode = InsertionMode::InTableBody; + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) { + m_stack_of_template_insertion_modes.take_last(); + m_stack_of_template_insertion_modes.append(InsertionMode::InRow); + m_insertion_mode = InsertionMode::InRow; + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + if (token.is_start_tag()) { + m_stack_of_template_insertion_modes.take_last(); + m_stack_of_template_insertion_modes.append(InsertionMode::InBody); + m_insertion_mode = InsertionMode::InBody; + process_using_the_rules_for(m_insertion_mode, token); + return; + } + + if (token.is_end_tag()) { + PARSE_ERROR(); + return; + } + + if (token.is_end_of_file()) { + if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { + stop_parsing(); + } else { + PARSE_ERROR(); + } + + m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_); + m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); + m_stack_of_template_insertion_modes.take_last(); + reset_the_insertion_mode_appropriately(); + process_using_the_rules_for(m_insertion_mode, token); + } +} + +void HTMLDocumentParser::handle_in_frameset(HTMLToken& token) +{ + if (token.is_character() && token.is_parser_whitespace()) { + insert_character(token.codepoint()); + return; + } + + if (token.is_comment()) { + insert_comment(token); + return; + } + + if (token.is_doctype()) { + PARSE_ERROR(); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { + process_using_the_rules_for(InsertionMode::InBody, token); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { + insert_html_element(token); + return; + } + + if (token.is_end_tag() && token.tag_name() == HTML::TagNames::frameset) { + // FIXME: If the current node is the root html element, then this is a parse error; ignore the token. (fragment case) + + m_stack_of_open_elements.pop(); + + if (m_parsing_fragment && current_node().tag_name() != HTML::TagNames::frameset) { + m_insertion_mode = InsertionMode::AfterFrameset; + } + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frame) { + insert_html_element(token); + m_stack_of_open_elements.pop(); + token.acknowledge_self_closing_flag_if_set(); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) { + process_using_the_rules_for(InsertionMode::InHead, token); + return; + } + + if (token.is_end_of_file()) { + //FIXME: If the current node is not the root html element, then this is a parse error. + + stop_parsing(); + return; + } + + PARSE_ERROR(); +} + +void HTMLDocumentParser::handle_after_frameset(HTMLToken& token) +{ + if (token.is_character() && token.is_parser_whitespace()) { + insert_character(token.codepoint()); + return; + } + + if (token.is_comment()) { + insert_comment(token); + return; + } + + if (token.is_doctype()) { + PARSE_ERROR(); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { + process_using_the_rules_for(InsertionMode::InBody, token); + return; + } + + if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { + m_insertion_mode = InsertionMode::AfterAfterFrameset; + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) { + process_using_the_rules_for(InsertionMode::InHead, token); + return; + } + + if (token.is_end_of_file()) { + stop_parsing(); + return; + } + + PARSE_ERROR(); +} + +void HTMLDocumentParser::handle_after_after_frameset(HTMLToken& token) +{ + if (token.is_comment()) { + auto comment = adopt(*new Comment(document(), token.m_comment_or_character.data.to_string())); + document().append_child(move(comment)); + return; + } + + if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) { + process_using_the_rules_for(InsertionMode::InBody, token); + return; + } + + if (token.is_end_of_file()) { + stop_parsing(); + return; + } + + if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) { + process_using_the_rules_for(InsertionMode::InHead, token); + return; + } + + PARSE_ERROR(); +} + void HTMLDocumentParser::reset_the_insertion_mode_appropriately() { for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h index 2e78c91917..a3c8153935 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h @@ -100,6 +100,10 @@ private: void handle_in_select(HTMLToken&); void handle_in_caption(HTMLToken&); void handle_in_column_group(HTMLToken&); + void handle_in_template(HTMLToken&); + void handle_in_frameset(HTMLToken&); + void handle_after_frameset(HTMLToken&); + void handle_after_after_frameset(HTMLToken&); void stop_parsing() { m_stop_parsing = true; } @@ -123,6 +127,10 @@ private: size_t script_nesting_level() const { return m_script_nesting_level; } void reset_the_insertion_mode_appropriately(); + void adjust_mathml_attributes(HTMLToken&); + void adjust_svg_attributes(HTMLToken&); + void adjust_foreign_attributes(HTMLToken&); + enum AdoptionAgencyAlgorithmOutcome { DoNothing, RunAnyOtherEndTagSteps, @@ -138,6 +146,7 @@ private: InsertionMode m_original_insertion_mode { InsertionMode::Initial }; StackOfOpenElements m_stack_of_open_elements; + Vector<InsertionMode> m_stack_of_template_insertion_modes; ListOfActiveFormattingElements m_list_of_active_formatting_elements; HTMLTokenizer m_tokenizer; diff --git a/Libraries/LibWeb/Parser/HTMLToken.cpp b/Libraries/LibWeb/Parser/HTMLToken.cpp index 587d1ae02d..ada76e415a 100644 --- a/Libraries/LibWeb/Parser/HTMLToken.cpp +++ b/Libraries/LibWeb/Parser/HTMLToken.cpp @@ -63,7 +63,7 @@ String HTMLToken::to_string() const builder.append(m_tag.tag_name.to_string()); builder.append("', { "); for (auto& attribute : m_tag.attributes) { - builder.append(attribute.name_builder.to_string()); + builder.append(attribute.local_name_builder.to_string()); builder.append("=\""); builder.append(attribute.value_builder.to_string()); builder.append("\" "); diff --git a/Libraries/LibWeb/Parser/HTMLToken.h b/Libraries/LibWeb/Parser/HTMLToken.h index 0727321f8d..33e53979a8 100644 --- a/Libraries/LibWeb/Parser/HTMLToken.h +++ b/Libraries/LibWeb/Parser/HTMLToken.h @@ -118,12 +118,40 @@ public: { ASSERT(is_start_tag() || is_end_tag()); for (auto& attribute : m_tag.attributes) { - if (attribute_name == attribute.name_builder.string_view()) + if (attribute_name == attribute.local_name_builder.string_view()) return attribute.value_builder.string_view(); } return {}; } + void adjust_attribute_name(const FlyString& old_name, const FlyString& new_name) + { + ASSERT(is_start_tag() || is_end_tag()); + for (auto& attribute : m_tag.attributes) { + if (old_name == attribute.local_name_builder.string_view()) { + attribute.local_name_builder.clear(); + attribute.local_name_builder.append(new_name); + } + } + } + + void adjust_foreign_attribute(const FlyString& old_name, const FlyString& prefix, const FlyString& local_name, const FlyString& namespace_) + { + ASSERT(is_start_tag() || is_end_tag()); + for (auto& attribute : m_tag.attributes) { + if (old_name == attribute.local_name_builder.string_view()) { + attribute.prefix_builder.clear(); + attribute.prefix_builder.append(prefix); + + attribute.local_name_builder.clear(); + attribute.local_name_builder.append(local_name); + + attribute.namespace_builder.clear(); + attribute.namespace_builder.append(namespace_); + } + } + } + void drop_attributes() { ASSERT(is_start_tag() || is_end_tag()); @@ -136,7 +164,9 @@ public: private: struct AttributeBuilder { - StringBuilder name_builder; + StringBuilder prefix_builder; + StringBuilder local_name_builder; + StringBuilder namespace_builder; StringBuilder value_builder; }; diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 6cb52a79fe..1a3bdc5ac3 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -1037,7 +1037,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_tag.attributes.last().name_builder.append_codepoint(current_input_character.value()); + m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(current_input_character.value()); continue; } } |