summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2020-06-25 23:42:08 +0200
committerAndreas Kling <kling@serenityos.org>2020-06-26 00:53:25 +0200
commit92d831c25b96d0a6657dba2a3f2a5099db4e27e1 (patch)
tree80b6e811b5220482a6dde9c14c3deeb2c2bd865a
parenteb33021d65b776bb2baae0ec05348f8c1d8d6106 (diff)
downloadserenity-92d831c25b96d0a6657dba2a3f2a5099db4e27e1.zip
LibWeb: Implement fragment parsing and use it for Element.innerHTML
This patch implements most of the HTML fragment parsing algorithm and ports Element::set_inner_html() to it. This was the last remaining user of the old HTML parser. :^)
-rw-r--r--Libraries/LibWeb/DOM/Document.cpp8
-rw-r--r--Libraries/LibWeb/DOM/Document.h2
-rw-r--r--Libraries/LibWeb/DOM/Element.cpp11
-rw-r--r--Libraries/LibWeb/DOM/Node.cpp7
-rw-r--r--Libraries/LibWeb/DOM/Node.h8
-rw-r--r--Libraries/LibWeb/Parser/HTMLDocumentParser.cpp69
-rw-r--r--Libraries/LibWeb/Parser/HTMLDocumentParser.h4
7 files changed, 91 insertions, 18 deletions
diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp
index 4c9810f374..93f27062ee 100644
--- a/Libraries/LibWeb/DOM/Document.cpp
+++ b/Libraries/LibWeb/DOM/Document.cpp
@@ -468,4 +468,12 @@ NonnullRefPtrVector<HTMLScriptElement> Document::take_scripts_to_execute_as_soon
return move(m_scripts_to_execute_as_soon_as_possible);
}
+void Document::adopt_node(Node& subtree_root)
+{
+ subtree_root.for_each_in_subtree([&](auto& node) {
+ node.set_document({}, *this);
+ return IterationDecision::Continue;
+ });
+}
+
}
diff --git a/Libraries/LibWeb/DOM/Document.h b/Libraries/LibWeb/DOM/Document.h
index ff03cd1329..f40f0f81cb 100644
--- a/Libraries/LibWeb/DOM/Document.h
+++ b/Libraries/LibWeb/DOM/Document.h
@@ -145,6 +145,8 @@ public:
bool in_quirks_mode() const { return m_quirks_mode; }
void set_quirks_mode(bool mode) { m_quirks_mode = mode; }
+ void adopt_node(Node&);
+
private:
virtual RefPtr<LayoutNode> create_layout_node(const StyleProperties* parent_style) override;
diff --git a/Libraries/LibWeb/DOM/Element.cpp b/Libraries/LibWeb/DOM/Element.cpp
index af7dd041a7..f6439ee87a 100644
--- a/Libraries/LibWeb/DOM/Element.cpp
+++ b/Libraries/LibWeb/DOM/Element.cpp
@@ -41,7 +41,7 @@
#include <LibWeb/Layout/LayoutTableRow.h>
#include <LibWeb/Layout/LayoutTableRowGroup.h>
#include <LibWeb/Layout/LayoutTreeBuilder.h>
-#include <LibWeb/Parser/HTMLParser.h>
+#include <LibWeb/Parser/HTMLDocumentParser.h>
namespace Web {
@@ -247,13 +247,10 @@ NonnullRefPtr<StyleProperties> Element::computed_style()
void Element::set_inner_html(StringView markup)
{
- auto fragment = parse_html_fragment(document(), markup);
+ auto new_children = HTMLDocumentParser::parse_html_fragment(*this, markup);
remove_all_children();
- if (!fragment)
- return;
- while (RefPtr<Node> child = fragment->first_child()) {
- fragment->remove_child(*child);
- append_child(*child);
+ while (!new_children.is_empty()) {
+ append_child(new_children.take_first());
}
set_needs_style_update(true);
diff --git a/Libraries/LibWeb/DOM/Node.cpp b/Libraries/LibWeb/DOM/Node.cpp
index 80aa2248c6..9869de0e52 100644
--- a/Libraries/LibWeb/DOM/Node.cpp
+++ b/Libraries/LibWeb/DOM/Node.cpp
@@ -51,7 +51,7 @@
namespace Web {
Node::Node(Document& document, NodeType type)
- : m_document(document)
+ : m_document(&document)
, m_type(type)
{
}
@@ -212,4 +212,9 @@ RefPtr<Node> Node::insert_before(NonnullRefPtr<Node> node, RefPtr<Node> child, b
return node;
}
+void Node::set_document(Badge<Document>, Document& document)
+{
+ m_document = &document;
+}
+
}
diff --git a/Libraries/LibWeb/DOM/Node.h b/Libraries/LibWeb/DOM/Node.h
index 987e0794dc..4e1da6b0e1 100644
--- a/Libraries/LibWeb/DOM/Node.h
+++ b/Libraries/LibWeb/DOM/Node.h
@@ -91,8 +91,8 @@ public:
virtual String text_content() const;
- Document& document() { return m_document; }
- const Document& document() const { return m_document; }
+ Document& document() { return *m_document; }
+ const Document& document() const { return *m_document; }
const HTMLAnchorElement* enclosing_link_element() const;
const HTMLElement* enclosing_html_element() const;
@@ -140,10 +140,12 @@ public:
virtual void document_did_attach_to_frame(Frame&) {}
virtual void document_will_detach_from_frame(Frame&) {}
+ void set_document(Badge<Document>, Document&);
+
protected:
Node(Document&, NodeType);
- Document& m_document;
+ Document* m_document { nullptr };
mutable LayoutNode* m_layout_node { nullptr };
NodeType m_type { NodeType::INVALID };
bool m_needs_style_update { true };
diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
index 1c316e0d41..4d98e935d2 100644
--- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
+++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
@@ -56,6 +56,7 @@ RefPtr<Document> parse_html_document(const StringView& data, const URL& url, con
HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding)
: m_tokenizer(input, encoding)
{
+ m_document = adopt(*new Document);
}
HTMLDocumentParser::~HTMLDocumentParser()
@@ -64,7 +65,6 @@ HTMLDocumentParser::~HTMLDocumentParser()
void HTMLDocumentParser::run(const URL& url)
{
- m_document = adopt(*new Document);
m_document->set_url(url);
m_document->set_source(m_tokenizer.source());
@@ -2518,10 +2518,17 @@ void HTMLDocumentParser::handle_after_after_frameset(HTMLToken& token)
PARSE_ERROR();
}
-void HTMLDocumentParser::reset_the_insertion_mode_appropriately()
+void HTMLDocumentParser::reset_the_insertion_mode_appropriately(const Element* context_element)
{
for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
- RefPtr<Element> node = m_stack_of_open_elements.elements().at(i);
+
+ // NOTE: When parsing fragments, we substitute the context element for the root of the stack of open elements.
+ RefPtr<Element> node;
+ if (i == 0 && context_element) {
+ node = context_element;
+ } else {
+ node = m_stack_of_open_elements.elements().at(i);
+ }
if (node->tag_name() == HTML::TagNames::select) {
TODO();
@@ -2580,9 +2587,6 @@ void HTMLDocumentParser::reset_the_insertion_mode_appropriately()
}
m_insertion_mode = InsertionMode::InBody;
- if (m_parsing_fragment) {
- TODO();
- }
}
const char* HTMLDocumentParser::insertion_mode_name() const
@@ -2601,4 +2605,57 @@ Document& HTMLDocumentParser::document()
{
return *m_document;
}
+
+NonnullRefPtrVector<Node> HTMLDocumentParser::parse_html_fragment(Element& context_element, const StringView& markup)
+{
+ HTMLDocumentParser parser(markup, "utf-8");
+ parser.m_parsing_fragment = true;
+ parser.document().set_quirks_mode(context_element.document().in_quirks_mode());
+
+ if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
+ parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
+ } else if (context_element.tag_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
+ parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
+ } else if (context_element.tag_name().is_one_of(HTML::TagNames::script)) {
+ parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
+ } else if (context_element.tag_name().is_one_of(HTML::TagNames::noscript)) {
+ if (context_element.document().is_scripting_enabled())
+ parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
+ } else if (context_element.tag_name().is_one_of(HTML::TagNames::noscript)) {
+ if (context_element.document().is_scripting_enabled())
+ parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
+ } else if (context_element.tag_name().is_one_of(HTML::TagNames::plaintext)) {
+ parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
+ }
+
+ auto root = create_element(context_element.document(), HTML::TagNames::html);
+ parser.document().append_child(root);
+ parser.m_stack_of_open_elements.push(root);
+
+ if (context_element.tag_name() == HTML::TagNames::template_) {
+ TODO();
+ }
+
+ // FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
+
+ parser.reset_the_insertion_mode_appropriately(&context_element);
+
+ for (auto* form_candidate = &context_element; form_candidate; form_candidate = form_candidate->parent_element()) {
+ if (is<HTMLFormElement>(*form_candidate)) {
+ parser.m_form_element = to<HTMLFormElement>(*form_candidate);
+ break;
+ }
+ }
+
+ parser.run(context_element.document().url());
+
+ NonnullRefPtrVector<Node> children;
+ while (RefPtr<Node> child = root->first_child()) {
+ root->remove_child(*child);
+ context_element.document().adopt_node(*child);
+ children.append(*child);
+ }
+ return children;
+}
+
}
diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h
index fa1bb2c33f..4f33776b62 100644
--- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h
+++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h
@@ -70,6 +70,8 @@ public:
Document& document();
+ static NonnullRefPtrVector<Node> parse_html_fragment(Element& context_element, const StringView&);
+
enum class InsertionMode {
#define __ENUMERATE_INSERTION_MODE(mode) mode,
ENUMERATE_INSERTION_MODES
@@ -134,7 +136,7 @@ private:
void increment_script_nesting_level();
void decrement_script_nesting_level();
size_t script_nesting_level() const { return m_script_nesting_level; }
- void reset_the_insertion_mode_appropriately();
+ void reset_the_insertion_mode_appropriately(const Element* context_element = nullptr);
void adjust_mathml_attributes(HTMLToken&);
void adjust_svg_attributes(HTMLToken&);