diff options
author | Andreas Kling <kling@serenityos.org> | 2020-05-27 23:22:42 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-27 23:22:42 +0200 |
commit | db6cf9b37df9f7750872991b095289dd6e689ee0 (patch) | |
tree | 023d48b0d255f54949207389c6dfd30e0e4b01e3 /Libraries/LibWeb | |
parent | 4c9c6b3a7bf58343bf4da116ffbf87e4cadb623c (diff) | |
download | serenity-db6cf9b37df9f7750872991b095289dd6e689ee0.zip |
LibWeb: Implement the first half of the Adoption Agency Algorithm
The AAA is a somewhat daunting algorithm you have to run for certain
tag when inserted inside the <body> element. The purpose of it is to
resolve issues with mismatched tags.
This patch implements the first half of the AAA. We also move the
"list of active formatting elements" to its own class, since it kept
accumulating little behaviors. "Marker" entries are now signified by
null Element pointers in the list.
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r-- | Libraries/LibWeb/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.cpp | 101 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.h | 5 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp | 74 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h | 63 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/StackOfOpenElements.cpp | 17 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/StackOfOpenElements.h | 3 |
7 files changed, 248 insertions, 16 deletions
diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index a37702ec04..83f8c85394 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -89,6 +89,7 @@ set(SOURCES Parser/HTMLParser.cpp Parser/HTMLToken.cpp Parser/HTMLTokenizer.cpp + Parser/ListOfActiveFormattingElements.cpp Parser/StackOfOpenElements.cpp ResourceLoader.cpp StylePropertiesModel.cpp diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp index 24f7e967c5..6da146dec0 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp @@ -492,11 +492,12 @@ void HTMLDocumentParser::reconstruct_the_active_formatting_elements() if (m_list_of_active_formatting_elements.is_empty()) return; - if (m_stack_of_open_elements.contains(m_list_of_active_formatting_elements.last())) + if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element)) return; - ssize_t index = m_list_of_active_formatting_elements.size() - 1; - RefPtr<Element> entry = m_list_of_active_formatting_elements.at(index); + ssize_t index = m_list_of_active_formatting_elements.entries().size() - 1; + RefPtr<Element> entry = m_list_of_active_formatting_elements.entries().at(index).element; + ASSERT(entry); Rewind: if (index == 0) { @@ -504,14 +505,16 @@ Rewind: } --index; - entry = m_list_of_active_formatting_elements.at(index); + entry = m_list_of_active_formatting_elements.entries().at(index).element; + ASSERT(entry); if (!m_stack_of_open_elements.contains(*entry)) goto Rewind; Advance: ++index; - entry = m_list_of_active_formatting_elements.at(index); + entry = m_list_of_active_formatting_elements.entries().at(index).element; + ASSERT(entry); Create: // FIXME: Hold on to the real token! @@ -520,12 +523,74 @@ Create: fake_token.m_tag.tag_name.append(entry->tag_name()); auto new_element = insert_html_element(fake_token); - m_list_of_active_formatting_elements.ptr_at(index) = *new_element; + m_list_of_active_formatting_elements.entries().at(index).element = *new_element; - if (index != (ssize_t)m_list_of_active_formatting_elements.size() - 1) + if (index != (ssize_t)m_list_of_active_formatting_elements.entries().size() - 1) goto Advance; } +void HTMLDocumentParser::run_the_adoption_agency_algorithm(HTMLToken& token) +{ + auto subject = token.tag_name(); + + // If the current node is an HTML element whose tag name is subject, + // and the current node is not in the list of active formatting elements, + // then pop the current node off the stack of open elements, and return. + if (current_node().tag_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) { + m_stack_of_open_elements.pop(); + return; + } + + size_t outer_loop_counter = 0; + +//OuterLoop: + if (outer_loop_counter >= 8) + return; + + ++outer_loop_counter; + + auto formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); + if (!formatting_element) { + // FIXME: If there is no such element, then return and instead act as + // described in the "any other end tag" entry above. + TODO(); + } + + if (!m_stack_of_open_elements.contains(*formatting_element)) { + PARSE_ERROR(); + // FIXME: If formatting element is not in the stack of open elements, + // then this is a parse error; remove the element from the list, and return. + TODO(); + } + + if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { + PARSE_ERROR(); + return; + } + + if (formatting_element != ¤t_node()) { + PARSE_ERROR(); + } + + // FIXME: Let furthest block be the topmost node in the stack of open elements + // that is lower in the stack than formatting element, and is an element + // in the special category. There might not be one. + RefPtr<Element> furthest_block = nullptr; + + if (!furthest_block) { + while (¤t_node() != formatting_element) + m_stack_of_open_elements.pop(); + m_stack_of_open_elements.pop(); + + m_list_of_active_formatting_elements.remove(*formatting_element); + return; + } + + // FIXME: Implement the rest of the AAA :^) + + TODO(); +} + void HTMLDocumentParser::handle_in_body(HTMLToken& token) { if (token.is_character()) { @@ -602,13 +667,16 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) return; } - { - if (token.is_start_tag() && token.tag_name().is_one_of("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) { - reconstruct_the_active_formatting_elements(); - auto element = insert_html_element(token); - m_list_of_active_formatting_elements.append(*element); - return; - } + if (token.is_start_tag() && token.tag_name().is_one_of("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) { + reconstruct_the_active_formatting_elements(); + auto element = insert_html_element(token); + m_list_of_active_formatting_elements.add(*element); + return; + } + + if (token.is_end_tag() && token.tag_name().is_one_of("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) { + run_the_adoption_agency_algorithm(token); + return; } if (token.is_start_tag() && token.tag_name().is_one_of("address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", "section", "summary", "ul")) { @@ -746,6 +814,11 @@ void HTMLDocumentParser::handle_text(HTMLToken& token) return; } + if (token.is_end_tag() && token.tag_name() == "style") { + current_node().children_changed(); + // NOTE: We don't return here, keep going. + } + if (token.is_end_tag()) { m_stack_of_open_elements.pop(); m_insertion_mode = m_original_insertion_mode; diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h index 6eeff69220..6ce358ac4c 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h @@ -29,6 +29,7 @@ #include <AK/NonnullRefPtrVector.h> #include <LibWeb/DOM/Node.h> #include <LibWeb/Parser/HTMLTokenizer.h> +#include <LibWeb/Parser/ListOfActiveFormattingElements.h> #include <LibWeb/Parser/StackOfOpenElements.h> #define ENUMERATE_INSERTION_MODES \ @@ -106,13 +107,13 @@ private: void decrement_script_nesting_level(); size_t script_nesting_level() const { return m_script_nesting_level; } void reset_the_insertion_mode_appropriately(); + void run_the_adoption_agency_algorithm(HTMLToken&); InsertionMode m_insertion_mode { InsertionMode::Initial }; InsertionMode m_original_insertion_mode { InsertionMode::Initial }; StackOfOpenElements m_stack_of_open_elements; - - NonnullRefPtrVector<Element> m_list_of_active_formatting_elements; + ListOfActiveFormattingElements m_list_of_active_formatting_elements; HTMLTokenizer m_tokenizer; diff --git a/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp new file mode 100644 index 0000000000..c49a94e050 --- /dev/null +++ b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020, Andreas Kling <kling@serenityos.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <LibWeb/DOM/Element.h> +#include <LibWeb/Parser/ListOfActiveFormattingElements.h> + +namespace Web { + +ListOfActiveFormattingElements::~ListOfActiveFormattingElements() +{ +} + +void ListOfActiveFormattingElements::add(Element& element) +{ + m_entries.append({ element }); +} + +void ListOfActiveFormattingElements::add_marker() +{ + m_entries.append({ nullptr }); +} + +bool ListOfActiveFormattingElements::contains(const Element& element) const +{ + for (auto& entry : m_entries) { + if (entry.element == &element) + return true; + } + return false; +} + +Element* ListOfActiveFormattingElements::last_element_with_tag_name_before_marker(const FlyString& tag_name) +{ + for (ssize_t i = m_entries.size() - 1; i >= 0; --i) { + auto& entry = m_entries[i]; + if (entry.is_marker()) + return nullptr; + if (entry.element->tag_name() == tag_name) + return entry.element; + } + return nullptr; +} + +void ListOfActiveFormattingElements::remove(Element& element) +{ + m_entries.remove_first_matching([&](auto& entry) { + return entry.element == &element; + }); +} + +} diff --git a/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h new file mode 100644 index 0000000000..4c05f633bd --- /dev/null +++ b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020, Andreas Kling <kling@serenityos.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/NonnullRefPtrVector.h> +#include <LibWeb/DOM/Element.h> +#include <LibWeb/Forward.h> + +namespace Web { + +class ListOfActiveFormattingElements { +public: + ListOfActiveFormattingElements() { } + ~ListOfActiveFormattingElements(); + + struct Entry { + bool is_marker() const { return !element; } + + RefPtr<Element> element; + }; + + bool is_empty() const { return m_entries.is_empty(); } + bool contains(const Element&) const; + + void add(Element& element); + void add_marker(); + + void remove(Element&); + + const Vector<Entry>& entries() const { return m_entries; } + Vector<Entry>& entries() { return m_entries; } + + Element* last_element_with_tag_name_before_marker(const FlyString& tag_name); + +private: + Vector<Entry> m_entries; +}; + +} diff --git a/Libraries/LibWeb/Parser/StackOfOpenElements.cpp b/Libraries/LibWeb/Parser/StackOfOpenElements.cpp index 8834385300..0a57ee344e 100644 --- a/Libraries/LibWeb/Parser/StackOfOpenElements.cpp +++ b/Libraries/LibWeb/Parser/StackOfOpenElements.cpp @@ -52,6 +52,23 @@ bool StackOfOpenElements::has_in_scope(const FlyString& tag_name) const return has_in_scope_impl(tag_name, s_base_list); } +bool StackOfOpenElements::has_in_scope_impl(const Element& target_node, const Vector<FlyString>& list) const +{ + for (ssize_t i = m_elements.size() - 1; i >= 0; --i) { + auto& node = m_elements.at(i); + if (&node == &target_node) + return true; + if (list.contains_slow(node.tag_name())) + return false; + } + ASSERT_NOT_REACHED(); +} + +bool StackOfOpenElements::has_in_scope(const Element& target_node) const +{ + return has_in_scope_impl(target_node, s_base_list); +} + bool StackOfOpenElements::has_in_button_scope(const FlyString& tag_name) const { auto list = s_base_list; diff --git a/Libraries/LibWeb/Parser/StackOfOpenElements.h b/Libraries/LibWeb/Parser/StackOfOpenElements.h index 5e3ed6ae52..4240676aec 100644 --- a/Libraries/LibWeb/Parser/StackOfOpenElements.h +++ b/Libraries/LibWeb/Parser/StackOfOpenElements.h @@ -48,12 +48,15 @@ public: bool has_in_button_scope(const FlyString& tag_name) const; bool has_in_table_scope(const FlyString& tag_name) const; + bool has_in_scope(const Element&) const; + bool contains(const Element&) const; const NonnullRefPtrVector<Element>& elements() const { return m_elements; } private: bool has_in_scope_impl(const FlyString& tag_name, const Vector<FlyString>&) const; + bool has_in_scope_impl(const Element& target_node, const Vector<FlyString>&) const; NonnullRefPtrVector<Element> m_elements; }; |