summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibWeb/HTML/Parser
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2022-02-21 21:54:21 +0100
committerAndreas Kling <kling@serenityos.org>2022-02-21 22:00:28 +0100
commit8b2499b11211556cc5998aa4e08501cf580e4c93 (patch)
tree8be7873b82237c872d42276e2db94bbccf4c5d01 /Userland/Libraries/LibWeb/HTML/Parser
parentbb1f26c149f74729f8141a8e09c46db8686dbc50 (diff)
downloadserenity-8b2499b11211556cc5998aa4e08501cf580e4c93.zip
LibWeb: Make document.write() work while document is parsing
This necessitated making HTMLParser ref-counted, and having it register itself with Document when created. That makes it possible for scripts to add new input at the current parser insertion point. There is now a reference cycle between Document and HTMLParser. This cycle is explicitly broken by calling Document::detach_parser() at the end of HTMLParser::run(). This is a huge progression on ACID3, from 31% to 49%! :^)
Diffstat (limited to 'Userland/Libraries/LibWeb/HTML/Parser')
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp53
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h11
2 files changed, 40 insertions, 24 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
index 89e9d34231..4c2e4dda3f 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
@@ -121,8 +121,8 @@ static bool is_html_integration_point(DOM::Element const& element)
RefPtr<DOM::Document> parse_html_document(StringView data, const AK::URL& url, const String& encoding)
{
auto document = DOM::Document::create(url);
- HTMLParser parser(document, data, encoding);
- parser.run(url);
+ auto parser = HTMLParser::create(document, data, encoding);
+ parser->run(url);
return document;
}
@@ -131,6 +131,7 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String&
, m_document(document)
{
m_tokenizer.set_parser({}, *this);
+ m_document->set_parser({}, *this);
m_document->set_should_invalidate_styles_on_attribute_changes(false);
auto standardized_encoding = TextCodec::get_standardized_encoding(encoding);
VERIFY(standardized_encoding.has_value());
@@ -140,6 +141,7 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String&
HTMLParser::HTMLParser(DOM::Document& document)
: m_document(document)
{
+ m_document->set_parser({}, *this);
m_tokenizer.set_parser({}, *this);
}
@@ -201,6 +203,7 @@ void HTMLParser::run(const AK::URL& url)
m_document->set_source(m_tokenizer.source());
run();
the_end();
+ m_document->detach_parser({});
}
// https://html.spec.whatwg.org/multipage/parsing.html#the-end
@@ -3180,44 +3183,44 @@ DOM::Document& HTMLParser::document()
NonnullRefPtrVector<DOM::Node> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
{
auto temp_document = DOM::Document::create();
- HTMLParser parser(*temp_document, markup, "utf-8");
- parser.m_context_element = context_element;
- parser.m_parsing_fragment = true;
- parser.document().set_quirks_mode(context_element.document().mode());
+ auto parser = HTMLParser::create(*temp_document, markup, "utf-8");
+ parser->m_context_element = context_element;
+ parser->m_parsing_fragment = true;
+ parser->document().set_quirks_mode(context_element.document().mode());
if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
- parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
+ parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
} else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
- parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
+ parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
} else if (context_element.local_name().is_one_of(HTML::TagNames::script)) {
- parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
+ parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
} else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
if (context_element.document().is_scripting_enabled())
- parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
+ parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
} else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
- parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
+ parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
}
auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML);
- parser.document().append_child(root);
- parser.m_stack_of_open_elements.push(root);
+ parser->document().append_child(root);
+ parser->m_stack_of_open_elements.push(root);
if (context_element.local_name() == HTML::TagNames::template_) {
- parser.m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
+ parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
}
// FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
- parser.reset_the_insertion_mode_appropriately();
+ parser->reset_the_insertion_mode_appropriately();
for (auto* form_candidate = &context_element; form_candidate; form_candidate = form_candidate->parent_element()) {
if (is<HTMLFormElement>(*form_candidate)) {
- parser.m_form_element = verify_cast<HTMLFormElement>(*form_candidate);
+ parser->m_form_element = verify_cast<HTMLFormElement>(*form_candidate);
break;
}
}
- parser.run(context_element.document().url());
+ parser->run(context_element.document().url());
NonnullRefPtrVector<DOM::Node> children;
while (RefPtr<DOM::Node> child = root->first_child()) {
@@ -3228,13 +3231,23 @@ NonnullRefPtrVector<DOM::Node> HTMLParser::parse_html_fragment(DOM::Element& con
return children;
}
-NonnullOwnPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, const ByteBuffer& input)
+NonnullRefPtr<HTMLParser> HTMLParser::create_for_scripting(DOM::Document& document)
+{
+ return adopt_ref(*new HTMLParser(document));
+}
+
+NonnullRefPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, const ByteBuffer& input)
{
if (document.has_encoding())
- return make<HTMLParser>(document, input, document.encoding().value());
+ return adopt_ref(*new HTMLParser(document, input, document.encoding().value()));
auto encoding = run_encoding_sniffing_algorithm(document, input);
dbgln("The encoding sniffing algorithm returned encoding '{}'", encoding);
- return make<HTMLParser>(document, input, encoding);
+ return adopt_ref(*new HTMLParser(document, input, encoding));
+}
+
+NonnullRefPtr<HTMLParser> HTMLParser::create(DOM::Document& document, StringView input, String const& encoding)
+{
+ return adopt_ref(*new HTMLParser(document, input, encoding));
}
// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
index cbfa767632..34ffcdead6 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
@@ -41,15 +41,15 @@ namespace Web::HTML {
RefPtr<DOM::Document> parse_html_document(StringView, const AK::URL&, const String& encoding);
-class HTMLParser {
+class HTMLParser : public RefCounted<HTMLParser> {
friend class HTMLTokenizer;
public:
- HTMLParser(DOM::Document&, StringView input, const String& encoding);
- HTMLParser(DOM::Document&);
~HTMLParser();
- static NonnullOwnPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, const ByteBuffer& input);
+ static NonnullRefPtr<HTMLParser> create_for_scripting(DOM::Document&);
+ static NonnullRefPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input);
+ static NonnullRefPtr<HTMLParser> create(DOM::Document&, StringView input, String const& encoding);
void run();
void run(const AK::URL&);
@@ -76,6 +76,9 @@ public:
size_t script_nesting_level() const { return m_script_nesting_level; }
private:
+ HTMLParser(DOM::Document&, StringView input, const String& encoding);
+ HTMLParser(DOM::Document&);
+
const char* insertion_mode_name() const;
DOM::QuirksMode which_quirks_mode(const HTMLToken&) const;