diff options
author | Simon Wanner <skyrising@pvpctutorials.de> | 2022-03-20 02:11:42 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-03-20 02:52:37 +0100 |
commit | 1d95745901aced2f272cdf3c2a05d0deea3d91a3 (patch) | |
tree | 932f4f396264b61f29160b071e1fb7554acc0fd4 /Userland/Libraries/LibWeb/HTML/Parser | |
parent | e165ae5b60bac9dc26905b3a9d00b60481187f3e (diff) | |
download | serenity-1d95745901aced2f272cdf3c2a05d0deea3d91a3.zip |
LibWeb: Implement the rest of the Adoption Agency Algorithm
This gets us 2 points on html5test.com :^)
- Before: https://html5te.st/4cf57659bc08272e (208)
- After: https://html5te.st/fb8a9259bda1c115 (210)
Diffstat (limited to 'Userland/Libraries/LibWeb/HTML/Parser')
6 files changed, 226 insertions, 30 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 203fcf2456..01b3e91fba 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -535,9 +535,9 @@ DOM::Element& HTMLParser::node_before_current_node() } // https://html.spec.whatwg.org/multipage/parsing.html#appropriate-place-for-inserting-a-node -HTMLParser::AdjustedInsertionLocation HTMLParser::find_appropriate_place_for_inserting_node() +HTMLParser::AdjustedInsertionLocation HTMLParser::find_appropriate_place_for_inserting_node(RefPtr<DOM::Element> override_target) { - auto& target = current_node(); + auto& target = override_target ? *override_target.ptr() : current_node(); HTMLParser::AdjustedInsertionLocation adjusted_insertion_location; // 2. Determine the adjusted insertion location using the first matching steps from the following list: @@ -1148,50 +1148,188 @@ Create: goto Advance; } +// https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm HTMLParser::AdoptionAgencyAlgorithmOutcome HTMLParser::run_the_adoption_agency_algorithm(HTMLToken& token) { - auto subject = token.tag_name(); + // 1. Let subject be token's tag name. + auto& subject = token.tag_name(); - // If the current node is an HTML element whose tag name is subject, - // and the current node is not in the list of active formatting elements, - // then pop the current node off the stack of open elements, and return. + // 2. If the current node is an HTML element whose tag name is subject, + // and the current node is not in the list of active formatting elements, + // then pop the current node off the stack of open elements, and return. if (current_node().local_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) { (void)m_stack_of_open_elements.pop(); return AdoptionAgencyAlgorithmOutcome::DoNothing; } - auto formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); - if (!formatting_element) - return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps; + // 3. Let outer loop counter be 0. + size_t outer_loop_counter = 0; - if (!m_stack_of_open_elements.contains(*formatting_element)) { - log_parse_error(); - m_list_of_active_formatting_elements.remove(*formatting_element); - return AdoptionAgencyAlgorithmOutcome::DoNothing; - } + // 4. While true: + while (true) { + // 1. If outer loop counter is greater than or equal to 8, then return. + if (outer_loop_counter >= 8) + return AdoptionAgencyAlgorithmOutcome::DoNothing; - if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { - log_parse_error(); - return AdoptionAgencyAlgorithmOutcome::DoNothing; - } + // 2. Increment outer loop counter by 1. + outer_loop_counter++; - if (formatting_element != ¤t_node()) { - log_parse_error(); - } + // 3. Let formatting element be the last element in the list of active formatting elements that: + // - is between the end of the list and the last marker in the list, if any, or the start of the list otherwise, and + // - has the tag name subject. + auto* formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); + + // If there is no such element, then return and instead act as described in the "any other end tag" entry above. + if (!formatting_element) + return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps; - RefPtr<DOM::Element> furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element); + // 4. If formatting element is not in the stack of open elements, + if (!m_stack_of_open_elements.contains(*formatting_element)) { + // then this is a parse error; + log_parse_error(); + // remove the element from the list, + m_list_of_active_formatting_elements.remove(*formatting_element); + // and return. + return AdoptionAgencyAlgorithmOutcome::DoNothing; + } - if (!furthest_block) { - while (¤t_node() != formatting_element) + // 5. If formatting element is in the stack of open elements, but the element is not in scope, + if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { + // then this is a parse error; + log_parse_error(); + // return. + return AdoptionAgencyAlgorithmOutcome::DoNothing; + } + + // 6. If formatting element is not the current node, + if (formatting_element != ¤t_node()) { + // this is a parse error. (But do not return.) + log_parse_error(); + } + + // 7. Let furthest block be the topmost node in the stack of open elements that is lower in the stack than formatting element, + // and is an element in the special category. There might not be one. + RefPtr<DOM::Element> furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element); + + // 8. If there is no furthest block + if (!furthest_block) { + // then the UA must first pop all the nodes from the bottom of the stack of open elements, + // from the current node up to and including formatting element, + while (¤t_node() != formatting_element) + (void)m_stack_of_open_elements.pop(); (void)m_stack_of_open_elements.pop(); - (void)m_stack_of_open_elements.pop(); + // then remove formatting element from the list of active formatting elements, + m_list_of_active_formatting_elements.remove(*formatting_element); + // and finally return. + return AdoptionAgencyAlgorithmOutcome::DoNothing; + } + + // 9. Let common ancestor be the element immediately above formatting element in the stack of open elements. + auto common_ancestor = m_stack_of_open_elements.element_immediately_above(*formatting_element); + + // 10. Let a bookmark note the position of formatting element in the list of active formatting elements + // relative to the elements on either side of it in the list. + auto bookmark = m_list_of_active_formatting_elements.find_index(*formatting_element).value(); + + // 11. Let node and last node be furthest block. + auto node = furthest_block; + auto last_node = furthest_block; + + // Keep track of this for later + auto node_above_node = m_stack_of_open_elements.element_immediately_above(*node); + + // 12. Let inner loop counter be 0. + size_t inner_loop_counter = 0; + + // 13. While true: + while (true) { + // 1. Increment inner loop counter by 1. + inner_loop_counter++; + + // 2. Let node be the element immediately above node in the stack of open elements, + // or if node is no longer in the stack of open elements (e.g. because it got removed by this algorithm), + // the element that was immediately above node in the stack of open elements before node was removed. + node = node_above_node; + VERIFY(node); + + // Keep track of this for later + node_above_node = m_stack_of_open_elements.element_immediately_above(*node); + + // 3. If node is formatting element, then break. + if (node == formatting_element) + break; + + // 4. If inner loop counter is greater than 3 and node is in the list of active formatting elements, + if (inner_loop_counter > 3 && m_list_of_active_formatting_elements.contains(*node)) { + auto node_index = m_list_of_active_formatting_elements.find_index(*node); + if (node_index.has_value() && node_index.value() < bookmark) + bookmark--; + // then remove node from the list of active formatting elements. + m_list_of_active_formatting_elements.remove(*node); + } + + // 5. If node is not in the list of active formatting elements + if (!m_list_of_active_formatting_elements.contains(*node)) { + // then remove node from the stack of open elements and continue. + m_stack_of_open_elements.remove(*node); + continue; + } + + // 6. Create an element for the token for which the element node was created, + // in the HTML namespace, with common ancestor as the intended parent; + // FIXME: hold onto the real token + auto element = create_element_for(HTMLToken::make_start_tag(node->local_name()), Namespace::HTML, *common_ancestor); + // replace the entry for node in the list of active formatting elements with an entry for the new element, + m_list_of_active_formatting_elements.replace(*node, *element); + // replace the entry for node in the stack of open elements with an entry for the new element, + m_stack_of_open_elements.replace(*node, element); + // and let node be the new element. + node = element; + + // 7. If last node is furthest block, + if (last_node == furthest_block) { + // then move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. + bookmark = m_list_of_active_formatting_elements.find_index(*node).value() + 1; + } + + // 8. Append last node to node. + node->append_child(*last_node); + + // 9. Set last node to node. + last_node = node; + } + + // 14. Insert whatever last node ended up being in the previous step at the appropriate place for inserting a node, + // but using common ancestor as the override target. + auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(common_ancestor); + adjusted_insertion_location.parent->insert_before(*last_node, adjusted_insertion_location.insert_before_sibling, false); + + // 15. Create an element for the token for which formatting element was created, + // in the HTML namespace, with furthest block as the intended parent. + // FIXME: hold onto the real token + auto element = create_element_for(HTMLToken::make_start_tag(formatting_element->local_name()), Namespace::HTML, *furthest_block); + + // 16. Take all of the child nodes of furthest block and append them to the element created in the last step. + for (auto& child : furthest_block->children_as_vector()) + element->append_child(furthest_block->remove_child(child).release_value()); + + // 17. Append that new element to furthest block. + furthest_block->append_child(element); + + // 18. Remove formatting element from the list of active formatting elements, + // and insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. + auto formatting_element_index = m_list_of_active_formatting_elements.find_index(*formatting_element); + if (formatting_element_index.has_value() && formatting_element_index.value() < bookmark) + bookmark--; m_list_of_active_formatting_elements.remove(*formatting_element); - return AdoptionAgencyAlgorithmOutcome::DoNothing; - } + m_list_of_active_formatting_elements.insert_at(bookmark, *element); - // FIXME: Implement the rest of the AAA :^) - return AdoptionAgencyAlgorithmOutcome::DoNothing; + // 19. Remove formatting element from the stack of open elements, and insert the new element + // into the stack of open elements immediately below the position of furthest block in that stack. + m_stack_of_open_elements.remove(*formatting_element); + m_stack_of_open_elements.insert_immediately_below(*element, *furthest_block); + } } bool HTMLParser::is_special_tag(const FlyString& tag_name, const FlyString& namespace_) diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h index c6045223cf..78137adf24 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h @@ -120,7 +120,7 @@ private: RefPtr<DOM::Node> insert_before_sibling; }; - AdjustedInsertionLocation find_appropriate_place_for_inserting_node(); + AdjustedInsertionLocation find_appropriate_place_for_inserting_node(RefPtr<DOM::Element> override_target = nullptr); DOM::Text* find_character_insertion_node(); void flush_character_insertions(); diff --git a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp index 4261117e60..838f147df6 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp @@ -59,4 +59,26 @@ void ListOfActiveFormattingElements::clear_up_to_the_last_marker() } } +Optional<size_t> ListOfActiveFormattingElements::find_index(DOM::Element const& element) const +{ + for (size_t i = 0; i < m_entries.size(); i++) { + if (m_entries[i].element == element) + return i; + } + return {}; +} + +void ListOfActiveFormattingElements::replace(DOM::Element& to_remove, DOM::Element& to_add) +{ + for (size_t i = 0; i < m_entries.size(); i++) { + if (m_entries[i].element == to_remove) + m_entries[i].element = to_add; + } +} + +void ListOfActiveFormattingElements::insert_at(size_t index, DOM::Element& element) +{ + m_entries.insert(index, { element }); +} + } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h index 15fc533d73..9476e28497 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h @@ -28,6 +28,9 @@ public: void add(DOM::Element& element); void add_marker(); + void insert_at(size_t index, DOM::Element& element); + + void replace(DOM::Element& to_remove, DOM::Element& to_add); void remove(DOM::Element&); @@ -38,6 +41,8 @@ public: void clear_up_to_the_last_marker(); + Optional<size_t> find_index(DOM::Element const&) const; + private: Vector<Entry> m_entries; }; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp b/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp index 35027eb81f..60367b9327 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp @@ -134,4 +134,32 @@ DOM::Element* StackOfOpenElements::element_immediately_above(DOM::Element const& return nullptr; } +void StackOfOpenElements::remove(const DOM::Element& element) +{ + m_elements.remove_first_matching([&element](DOM::Element const& other) { + return &other == &element; + }); +} + +void StackOfOpenElements::replace(const DOM::Element& to_remove, NonnullRefPtr<DOM::Element> to_add) +{ + for (size_t i = 0; i < m_elements.size(); i++) { + if (&m_elements[i] == &to_remove) { + m_elements.remove(i); + m_elements.insert(i, move(to_add)); + break; + } + } +} + +void StackOfOpenElements::insert_immediately_below(NonnullRefPtr<DOM::Element> element_to_add, DOM::Element const& target) +{ + for (size_t i = 0; i < m_elements.size(); i++) { + if (&m_elements[i] == &target) { + m_elements.insert(i + 1, move(element_to_add)); + break; + } + } +} + } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h b/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h index 4c456f1b58..8c6baae04a 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h @@ -29,6 +29,9 @@ public: bool is_empty() const { return m_elements.is_empty(); } void push(NonnullRefPtr<DOM::Element> element) { m_elements.append(move(element)); } NonnullRefPtr<DOM::Element> pop() { return m_elements.take_last(); } + void remove(DOM::Element const& element); + void replace(DOM::Element const& to_remove, NonnullRefPtr<DOM::Element> to_add); + void insert_immediately_below(NonnullRefPtr<DOM::Element> element_to_add, DOM::Element const& target); const DOM::Element& current_node() const { return m_elements.last(); } DOM::Element& current_node() { return m_elements.last(); } |