diff options
author | Andreas Kling <kling@serenityos.org> | 2020-06-25 23:42:08 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-06-26 00:53:25 +0200 |
commit | 92d831c25b96d0a6657dba2a3f2a5099db4e27e1 (patch) | |
tree | 80b6e811b5220482a6dde9c14c3deeb2c2bd865a | |
parent | eb33021d65b776bb2baae0ec05348f8c1d8d6106 (diff) | |
download | serenity-92d831c25b96d0a6657dba2a3f2a5099db4e27e1.zip |
LibWeb: Implement fragment parsing and use it for Element.innerHTML
This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)
-rw-r--r-- | Libraries/LibWeb/DOM/Document.cpp | 8 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Document.h | 2 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Element.cpp | 11 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Node.cpp | 7 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Node.h | 8 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.cpp | 69 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.h | 4 |
7 files changed, 91 insertions, 18 deletions
diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp index 4c9810f374..93f27062ee 100644 --- a/Libraries/LibWeb/DOM/Document.cpp +++ b/Libraries/LibWeb/DOM/Document.cpp @@ -468,4 +468,12 @@ NonnullRefPtrVector<HTMLScriptElement> Document::take_scripts_to_execute_as_soon return move(m_scripts_to_execute_as_soon_as_possible); } +void Document::adopt_node(Node& subtree_root) +{ + subtree_root.for_each_in_subtree([&](auto& node) { + node.set_document({}, *this); + return IterationDecision::Continue; + }); +} + } diff --git a/Libraries/LibWeb/DOM/Document.h b/Libraries/LibWeb/DOM/Document.h index ff03cd1329..f40f0f81cb 100644 --- a/Libraries/LibWeb/DOM/Document.h +++ b/Libraries/LibWeb/DOM/Document.h @@ -145,6 +145,8 @@ public: bool in_quirks_mode() const { return m_quirks_mode; } void set_quirks_mode(bool mode) { m_quirks_mode = mode; } + void adopt_node(Node&); + private: virtual RefPtr<LayoutNode> create_layout_node(const StyleProperties* parent_style) override; diff --git a/Libraries/LibWeb/DOM/Element.cpp b/Libraries/LibWeb/DOM/Element.cpp index af7dd041a7..f6439ee87a 100644 --- a/Libraries/LibWeb/DOM/Element.cpp +++ b/Libraries/LibWeb/DOM/Element.cpp @@ -41,7 +41,7 @@ #include <LibWeb/Layout/LayoutTableRow.h> #include <LibWeb/Layout/LayoutTableRowGroup.h> #include <LibWeb/Layout/LayoutTreeBuilder.h> -#include <LibWeb/Parser/HTMLParser.h> +#include <LibWeb/Parser/HTMLDocumentParser.h> namespace Web { @@ -247,13 +247,10 @@ NonnullRefPtr<StyleProperties> Element::computed_style() void Element::set_inner_html(StringView markup) { - auto fragment = parse_html_fragment(document(), markup); + auto new_children = HTMLDocumentParser::parse_html_fragment(*this, markup); remove_all_children(); - if (!fragment) - return; - while (RefPtr<Node> child = fragment->first_child()) { - fragment->remove_child(*child); - append_child(*child); + while (!new_children.is_empty()) { + append_child(new_children.take_first()); } set_needs_style_update(true); diff --git a/Libraries/LibWeb/DOM/Node.cpp b/Libraries/LibWeb/DOM/Node.cpp index 80aa2248c6..9869de0e52 100644 --- a/Libraries/LibWeb/DOM/Node.cpp +++ b/Libraries/LibWeb/DOM/Node.cpp @@ -51,7 +51,7 @@ namespace Web { Node::Node(Document& document, NodeType type) - : m_document(document) + : m_document(&document) , m_type(type) { } @@ -212,4 +212,9 @@ RefPtr<Node> Node::insert_before(NonnullRefPtr<Node> node, RefPtr<Node> child, b return node; } +void Node::set_document(Badge<Document>, Document& document) +{ + m_document = &document; +} + } diff --git a/Libraries/LibWeb/DOM/Node.h b/Libraries/LibWeb/DOM/Node.h index 987e0794dc..4e1da6b0e1 100644 --- a/Libraries/LibWeb/DOM/Node.h +++ b/Libraries/LibWeb/DOM/Node.h @@ -91,8 +91,8 @@ public: virtual String text_content() const; - Document& document() { return m_document; } - const Document& document() const { return m_document; } + Document& document() { return *m_document; } + const Document& document() const { return *m_document; } const HTMLAnchorElement* enclosing_link_element() const; const HTMLElement* enclosing_html_element() const; @@ -140,10 +140,12 @@ public: virtual void document_did_attach_to_frame(Frame&) {} virtual void document_will_detach_from_frame(Frame&) {} + void set_document(Badge<Document>, Document&); + protected: Node(Document&, NodeType); - Document& m_document; + Document* m_document { nullptr }; mutable LayoutNode* m_layout_node { nullptr }; NodeType m_type { NodeType::INVALID }; bool m_needs_style_update { true }; diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp index 1c316e0d41..4d98e935d2 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp @@ -56,6 +56,7 @@ RefPtr<Document> parse_html_document(const StringView& data, const URL& url, con HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding) : m_tokenizer(input, encoding) { + m_document = adopt(*new Document); } HTMLDocumentParser::~HTMLDocumentParser() @@ -64,7 +65,6 @@ HTMLDocumentParser::~HTMLDocumentParser() void HTMLDocumentParser::run(const URL& url) { - m_document = adopt(*new Document); m_document->set_url(url); m_document->set_source(m_tokenizer.source()); @@ -2518,10 +2518,17 @@ void HTMLDocumentParser::handle_after_after_frameset(HTMLToken& token) PARSE_ERROR(); } -void HTMLDocumentParser::reset_the_insertion_mode_appropriately() +void HTMLDocumentParser::reset_the_insertion_mode_appropriately(const Element* context_element) { for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { - RefPtr<Element> node = m_stack_of_open_elements.elements().at(i); + + // NOTE: When parsing fragments, we substitute the context element for the root of the stack of open elements. + RefPtr<Element> node; + if (i == 0 && context_element) { + node = context_element; + } else { + node = m_stack_of_open_elements.elements().at(i); + } if (node->tag_name() == HTML::TagNames::select) { TODO(); @@ -2580,9 +2587,6 @@ void HTMLDocumentParser::reset_the_insertion_mode_appropriately() } m_insertion_mode = InsertionMode::InBody; - if (m_parsing_fragment) { - TODO(); - } } const char* HTMLDocumentParser::insertion_mode_name() const @@ -2601,4 +2605,57 @@ Document& HTMLDocumentParser::document() { return *m_document; } + +NonnullRefPtrVector<Node> HTMLDocumentParser::parse_html_fragment(Element& context_element, const StringView& markup) +{ + HTMLDocumentParser parser(markup, "utf-8"); + parser.m_parsing_fragment = true; + parser.document().set_quirks_mode(context_element.document().in_quirks_mode()); + + if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { + parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); + } else if (context_element.tag_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) { + parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); + } else if (context_element.tag_name().is_one_of(HTML::TagNames::script)) { + parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData); + } else if (context_element.tag_name().is_one_of(HTML::TagNames::noscript)) { + if (context_element.document().is_scripting_enabled()) + parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); + } else if (context_element.tag_name().is_one_of(HTML::TagNames::noscript)) { + if (context_element.document().is_scripting_enabled()) + parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); + } else if (context_element.tag_name().is_one_of(HTML::TagNames::plaintext)) { + parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT); + } + + auto root = create_element(context_element.document(), HTML::TagNames::html); + parser.document().append_child(root); + parser.m_stack_of_open_elements.push(root); + + if (context_element.tag_name() == HTML::TagNames::template_) { + TODO(); + } + + // FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. + + parser.reset_the_insertion_mode_appropriately(&context_element); + + for (auto* form_candidate = &context_element; form_candidate; form_candidate = form_candidate->parent_element()) { + if (is<HTMLFormElement>(*form_candidate)) { + parser.m_form_element = to<HTMLFormElement>(*form_candidate); + break; + } + } + + parser.run(context_element.document().url()); + + NonnullRefPtrVector<Node> children; + while (RefPtr<Node> child = root->first_child()) { + root->remove_child(*child); + context_element.document().adopt_node(*child); + children.append(*child); + } + return children; +} + } diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h index fa1bb2c33f..4f33776b62 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h @@ -70,6 +70,8 @@ public: Document& document(); + static NonnullRefPtrVector<Node> parse_html_fragment(Element& context_element, const StringView&); + enum class InsertionMode { #define __ENUMERATE_INSERTION_MODE(mode) mode, ENUMERATE_INSERTION_MODES @@ -134,7 +136,7 @@ private: void increment_script_nesting_level(); void decrement_script_nesting_level(); size_t script_nesting_level() const { return m_script_nesting_level; } - void reset_the_insertion_mode_appropriately(); + void reset_the_insertion_mode_appropriately(const Element* context_element = nullptr); void adjust_mathml_attributes(HTMLToken&); void adjust_svg_attributes(HTMLToken&); |