diff options
author | Andreas Kling <kling@serenityos.org> | 2022-09-20 21:08:14 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-09-20 23:44:59 +0200 |
commit | ab8432783e8d6187a91e48149bf5731e912d6349 (patch) | |
tree | 68a329606b7a99b275c9fbe8b9f910e245f53e18 /Userland | |
parent | 37ed1b28fa5459345e5c9d7a889752a43ea23c59 (diff) | |
download | serenity-ab8432783e8d6187a91e48149bf5731e912d6349.zip |
LibWeb: Implement aborting the HTML parser
This is roughly on-spec, although I had to invent a simple "aborted"
state for the tokenizer.
Diffstat (limited to 'Userland')
4 files changed, 33 insertions, 0 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 5885c21291..84a960b62d 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -3751,4 +3751,25 @@ JS::Realm& HTMLParser::realm() return m_document->realm(); } +// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser +void HTMLParser::abort() +{ + // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it. + m_tokenizer.abort(); + + // FIXME: 2. Stop the speculative HTML parser for this HTML parser. + + // 3. Update the current document readiness to "interactive". + m_document->update_readiness(DocumentReadyState::Interactive); + + // 4. Pop all the nodes off the stack of open elements. + while (!m_stack_of_open_elements.is_empty()) + m_stack_of_open_elements.pop(); + + // 5. Update the current document readiness to "complete". + m_document->update_readiness(DocumentReadyState::Complete); + + m_aborted = true; +} + } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h index 6838972ba4..861acc4b08 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h @@ -68,7 +68,11 @@ public: HTMLTokenizer& tokenizer() { return m_tokenizer; } + // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser + void abort(); + bool aborted() const { return m_aborted; } + bool stopped() const { return m_stop_parsing; } size_t script_nesting_level() const { return m_script_nesting_level; } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index b5619786bd..5905561f9e 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -257,6 +257,9 @@ _StartOfFunction: if (!m_queued_tokens.is_empty()) return m_queued_tokens.dequeue(); + if (m_aborted) + return {}; + for (;;) { auto current_input_character = next_code_point(); switch (m_state) { diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h index 05f5c5b697..2b0e17e601 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h @@ -143,6 +143,9 @@ public: m_insertion_point.position = m_utf8_view.iterator_offset(m_utf8_iterator); } + // This permanently cuts off the tokenizer input stream. + void abort() { m_aborted = true; } + private: void skip(size_t count); Optional<u32> next_code_point(); @@ -207,6 +210,8 @@ private: bool m_blocked { false }; + bool m_aborted { false }; + Vector<HTMLToken::Position> m_source_positions; }; |