summaryrefslogtreecommitdiff
path: root/Libraries/LibWeb
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2020-05-26 15:50:05 +0200
committerAndreas Kling <kling@serenityos.org>2020-05-26 15:50:05 +0200
commitecd25ce6c7dec8bf362e930705afbe1a951ab700 (patch)
tree0d17136cc5136b3e0a324fe2a157fa84dcaf8013 /Libraries/LibWeb
parent0f2b3cd2801d1b1c04675eb30e0343705cf36351 (diff)
downloadserenity-ecd25ce6c7dec8bf362e930705afbe1a951ab700.zip
LibWeb: Allow HTML tokenizer to emit more than one token
Tokens are now put on a queue when emitted, and we always pop from that queue when returning from next_token().
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.cpp33
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.h3
2 files changed, 26 insertions, 10 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
index ba4df5d474..1463067d1d 100644
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -58,7 +58,16 @@
will_switch_to(State::new_state); \
m_state = State::new_state; \
will_emit(m_current_token); \
- return m_current_token; \
+ m_queued_tokens.enqueue(m_current_token); \
+ return m_queued_tokens.dequeue(); \
+ } while (0)
+
+#define EMIT_CHARACTER_AND_RECONSUME_IN(codepoint, new_state) \
+ do { \
+ m_queued_tokens.enqueue(m_current_token); \
+ will_reconsume_in(State::new_state); \
+ m_state = State::new_state; \
+ goto new_state; \
} while (0)
#define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
@@ -90,21 +99,23 @@
m_has_emitted_eof = true; \
create_new_token(HTMLToken::Type::EndOfFile); \
will_emit(m_current_token); \
- return m_current_token; \
+ m_queued_tokens.enqueue(m_current_token); \
+ return m_queued_tokens.dequeue(); \
} while (0)
-#define EMIT_CURRENT_TOKEN \
- do { \
- will_emit(m_current_token); \
- return m_current_token; \
+#define EMIT_CURRENT_TOKEN \
+ do { \
+ will_emit(m_current_token); \
+ m_queued_tokens.enqueue(m_current_token); \
+ return m_queued_tokens.dequeue(); \
} while (0)
#define EMIT_CHARACTER(codepoint) \
do { \
create_new_token(HTMLToken::Type::Character); \
m_current_token.m_comment_or_character.data.append(codepoint); \
- will_emit(m_current_token); \
- return m_current_token; \
+ m_queued_tokens.enqueue(m_current_token); \
+ return m_queued_tokens.dequeue(); \
} while (0)
#define EMIT_CURRENT_CHARACTER \
@@ -141,6 +152,9 @@ Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
Optional<HTMLToken> HTMLTokenizer::next_token()
{
+ if (!m_queued_tokens.is_empty())
+ return m_queued_tokens.dequeue();
+
for (;;) {
auto current_input_character = next_codepoint();
switch (m_state) {
@@ -1270,8 +1284,7 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
}
ANYTHING_ELSE
{
- EMIT_CHARACTER('<');
- RECONSUME_IN(ScriptData);
+ EMIT_CHARACTER_AND_RECONSUME_IN('<', ScriptData);
}
}
END_STATE
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.h b/Libraries/LibWeb/Parser/HTMLTokenizer.h
index fe18e9a211..2f674eaad8 100644
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.h
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.h
@@ -26,6 +26,7 @@
#pragma once
+#include <AK/Queue.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <LibWeb/Forward.h>
@@ -165,5 +166,7 @@ private:
HTMLToken m_last_emitted_start_tag;
bool m_has_emitted_eof { false };
+
+ Queue<HTMLToken> m_queued_tokens;
};
}