summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2022-09-20 21:08:14 +0200
committerAndreas Kling <kling@serenityos.org>2022-09-20 23:44:59 +0200
commitab8432783e8d6187a91e48149bf5731e912d6349 (patch)
tree68a329606b7a99b275c9fbe8b9f910e245f53e18 /Userland
parent37ed1b28fa5459345e5c9d7a889752a43ea23c59 (diff)
downloadserenity-ab8432783e8d6187a91e48149bf5731e912d6349.zip
LibWeb: Implement aborting the HTML parser
This is roughly on-spec, although I had to invent a simple "aborted" state for the tokenizer.
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp21
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h4
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp3
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h5
4 files changed, 33 insertions, 0 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
index 5885c21291..84a960b62d 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
@@ -3751,4 +3751,25 @@ JS::Realm& HTMLParser::realm()
return m_document->realm();
}
+// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
+void HTMLParser::abort()
+{
+ // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it.
+ m_tokenizer.abort();
+
+ // FIXME: 2. Stop the speculative HTML parser for this HTML parser.
+
+ // 3. Update the current document readiness to "interactive".
+ m_document->update_readiness(DocumentReadyState::Interactive);
+
+ // 4. Pop all the nodes off the stack of open elements.
+ while (!m_stack_of_open_elements.is_empty())
+ m_stack_of_open_elements.pop();
+
+ // 5. Update the current document readiness to "complete".
+ m_document->update_readiness(DocumentReadyState::Complete);
+
+ m_aborted = true;
+}
+
}
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
index 6838972ba4..861acc4b08 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
@@ -68,7 +68,11 @@ public:
HTMLTokenizer& tokenizer() { return m_tokenizer; }
+ // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
+ void abort();
+
bool aborted() const { return m_aborted; }
+ bool stopped() const { return m_stop_parsing; }
size_t script_nesting_level() const { return m_script_nesting_level; }
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
index b5619786bd..5905561f9e 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -257,6 +257,9 @@ _StartOfFunction:
if (!m_queued_tokens.is_empty())
return m_queued_tokens.dequeue();
+ if (m_aborted)
+ return {};
+
for (;;) {
auto current_input_character = next_code_point();
switch (m_state) {
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
index 05f5c5b697..2b0e17e601 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
@@ -143,6 +143,9 @@ public:
m_insertion_point.position = m_utf8_view.iterator_offset(m_utf8_iterator);
}
+ // This permanently cuts off the tokenizer input stream.
+ void abort() { m_aborted = true; }
+
private:
void skip(size_t count);
Optional<u32> next_code_point();
@@ -207,6 +210,8 @@ private:
bool m_blocked { false };
+ bool m_aborted { false };
+
Vector<HTMLToken::Position> m_source_positions;
};