diff options
author | Andreas Kling <kling@serenityos.org> | 2020-05-26 15:50:05 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-26 15:50:05 +0200 |
commit | ecd25ce6c7dec8bf362e930705afbe1a951ab700 (patch) | |
tree | 0d17136cc5136b3e0a324fe2a157fa84dcaf8013 /Libraries/LibWeb | |
parent | 0f2b3cd2801d1b1c04675eb30e0343705cf36351 (diff) | |
download | serenity-ecd25ce6c7dec8bf362e930705afbe1a951ab700.zip |
LibWeb: Allow HTML tokenizer to emit more than one token
Tokens are now put on a queue when emitted, and we always pop from that
queue when returning from next_token().
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 33 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.h | 3 |
2 files changed, 26 insertions, 10 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index ba4df5d474..1463067d1d 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -58,7 +58,16 @@ will_switch_to(State::new_state); \ m_state = State::new_state; \ will_emit(m_current_token); \ - return m_current_token; \ + m_queued_tokens.enqueue(m_current_token); \ + return m_queued_tokens.dequeue(); \ + } while (0) + +#define EMIT_CHARACTER_AND_RECONSUME_IN(codepoint, new_state) \ + do { \ + m_queued_tokens.enqueue(m_current_token); \ + will_reconsume_in(State::new_state); \ + m_state = State::new_state; \ + goto new_state; \ } while (0) #define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor; @@ -90,21 +99,23 @@ m_has_emitted_eof = true; \ create_new_token(HTMLToken::Type::EndOfFile); \ will_emit(m_current_token); \ - return m_current_token; \ + m_queued_tokens.enqueue(m_current_token); \ + return m_queued_tokens.dequeue(); \ } while (0) -#define EMIT_CURRENT_TOKEN \ - do { \ - will_emit(m_current_token); \ - return m_current_token; \ +#define EMIT_CURRENT_TOKEN \ + do { \ + will_emit(m_current_token); \ + m_queued_tokens.enqueue(m_current_token); \ + return m_queued_tokens.dequeue(); \ } while (0) #define EMIT_CHARACTER(codepoint) \ do { \ create_new_token(HTMLToken::Type::Character); \ m_current_token.m_comment_or_character.data.append(codepoint); \ - will_emit(m_current_token); \ - return m_current_token; \ + m_queued_tokens.enqueue(m_current_token); \ + return m_queued_tokens.dequeue(); \ } while (0) #define EMIT_CURRENT_CHARACTER \ @@ -141,6 +152,9 @@ Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const Optional<HTMLToken> HTMLTokenizer::next_token() { + if (!m_queued_tokens.is_empty()) + return m_queued_tokens.dequeue(); + for (;;) { auto current_input_character = next_codepoint(); switch (m_state) { @@ -1270,8 +1284,7 @@ Optional<HTMLToken> HTMLTokenizer::next_token() } ANYTHING_ELSE { - EMIT_CHARACTER('<'); - RECONSUME_IN(ScriptData); + EMIT_CHARACTER_AND_RECONSUME_IN('<', ScriptData); } } END_STATE diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.h b/Libraries/LibWeb/Parser/HTMLTokenizer.h index fe18e9a211..2f674eaad8 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.h +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.h @@ -26,6 +26,7 @@ #pragma once +#include <AK/Queue.h> #include <AK/StringView.h> #include <AK/Types.h> #include <LibWeb/Forward.h> @@ -165,5 +166,7 @@ private: HTMLToken m_last_emitted_start_tag; bool m_has_emitted_eof { false }; + + Queue<HTMLToken> m_queued_tokens; }; } |