summaryrefslogtreecommitdiff
path: root/Libraries
diff options
context:
space:
mode:
Diffstat (limited to 'Libraries')
-rw-r--r--Libraries/LibWeb/CMakeLists.txt1
-rw-r--r--Libraries/LibWeb/Parser/HTMLToken.h76
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.cpp305
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.h160
4 files changed, 542 insertions, 0 deletions
diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt
index 2a53d4ed56..631ff1e534 100644
--- a/Libraries/LibWeb/CMakeLists.txt
+++ b/Libraries/LibWeb/CMakeLists.txt
@@ -85,6 +85,7 @@ set(SOURCES
Layout/LineBoxFragment.cpp
Parser/CSSParser.cpp
Parser/HTMLParser.cpp
+ Parser/HTMLTokenizer.cpp
ResourceLoader.cpp
StylePropertiesModel.cpp
URLEncoder.cpp
diff --git a/Libraries/LibWeb/Parser/HTMLToken.h b/Libraries/LibWeb/Parser/HTMLToken.h
new file mode 100644
index 0000000000..398a01ac7f
--- /dev/null
+++ b/Libraries/LibWeb/Parser/HTMLToken.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/String.h>
+#include <AK/StringBuilder.h>
+#include <AK/Types.h>
+#include <AK/Vector.h>
+#include <LibWeb/DOM/Attribute.h>
+
+namespace Web {
+
+class HTMLToken {
+ friend class HTMLTokenizer;
+
+public:
+ enum class Type {
+ DOCTYPE,
+ StartTag,
+ EndTag,
+ Comment,
+ Character,
+ EndOfFile,
+ };
+
+ Type type() const { return m_type; }
+
+private:
+ Type m_type;
+
+ // Type::DOCTYPE
+ struct {
+ StringBuilder name;
+ StringBuilder public_identifier;
+ StringBuilder system_public_identifier;
+ bool force_quirks { false };
+ } m_doctype;
+
+ // Type::StartTag
+ // Type::EndTag
+ struct {
+ StringBuilder tag_name;
+ bool self_closing { false };
+ Vector<Attribute> attributes;
+ } m_tag;
+
+ struct {
+ StringBuilder data;
+ } m_comment_or_character;
+};
+
+}
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
new file mode 100644
index 0000000000..bc7dfa2daf
--- /dev/null
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <LibWeb/Parser/HTMLToken.h>
+#include <LibWeb/Parser/HTMLTokenizer.h>
+#include <ctype.h>
+
+//#define TOKENIZER_TRACE
+
+#define SWITCH_TO(new_state) \
+ will_switch_to(State::new_state); \
+ m_state = State::new_state; \
+ current_input_character = next_codepoint(); \
+ goto new_state;
+
+#define RECONSUME_IN(new_state) \
+ will_reconsume_in(State::new_state); \
+ m_state = State::new_state; \
+ goto new_state;
+
+#define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;
+
+#define IGNORE_CHARACTER_AND_CONTINUE_IN(x) SWITCH_TO(x)
+
+#define ON(codepoint) \
+ if (current_input_character.has_value() && current_input_character.value() == codepoint)
+
+#define ON_EOF \
+ if (!current_input_character.has_value())
+
+#define ON_ASCII_ALPHA \
+ if (current_input_character.has_value() && isalpha(current_input_character.value()))
+
+#define ON_WHITESPACE \
+ if (current_input_character.has_value() && (current_input_character.value() == '\t' || current_input_character.value() == '\a' || current_input_character.value() == '\f' || current_input_character.value() == ' '))
+
+#define ANYTHING_ELSE if (1)
+
+#define EMIT_EOF_AND_RETURN \
+ create_new_token(HTMLToken::Type::EndOfFile); \
+ emit_current_token(); \
+ return;
+
+#define BEGIN_STATE(state) \
+ state: \
+ case State::state:
+
+#define END_STATE \
+ ASSERT_NOT_REACHED(); \
+ break;
+
+namespace Web {
+
+Optional<u32> HTMLTokenizer::next_codepoint()
+{
+ if (m_cursor >= m_input.length())
+ return {};
+ return m_input[m_cursor++];
+}
+
+Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
+{
+ if ((m_cursor + offset) >= m_input.length())
+ return {};
+ return m_input[m_cursor + offset];
+}
+
+void HTMLTokenizer::run()
+{
+ for (;;) {
+ auto current_input_character = next_codepoint();
+ switch (m_state) {
+ BEGIN_STATE(Data)
+ {
+ ON('&')
+ {
+ m_return_state = State::Data;
+ SWITCH_TO(CharacterReference);
+ }
+ ON('<')
+ {
+ SWITCH_TO(TagOpen);
+ }
+ ON_EOF
+ {
+ EMIT_EOF_AND_RETURN;
+ }
+ ANYTHING_ELSE
+ {
+ create_new_token(HTMLToken::Type::Character);
+ m_current_token.m_comment_or_character.data.append(current_input_character.value());
+ emit_current_token();
+ continue;
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(TagOpen)
+ {
+ ON('!')
+ {
+ SWITCH_TO(MarkupDeclarationOpen);
+ }
+ ON('/')
+ {
+ SWITCH_TO(EndTagOpen);
+ }
+ ON_ASCII_ALPHA
+ {
+ create_new_token(HTMLToken::Type::StartTag);
+ RECONSUME_IN(TagName);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(TagName)
+ {
+ ON('>')
+ {
+ emit_current_token();
+ SWITCH_TO(Data);
+ }
+ ANYTHING_ELSE
+ {
+ m_current_token.m_tag.tag_name.append(current_input_character.value());
+ continue;
+ }
+ }
+
+ BEGIN_STATE(EndTagOpen)
+ {
+ ON_ASCII_ALPHA
+ {
+ create_new_token(HTMLToken::Type::EndTag);
+ RECONSUME_IN(TagName);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(MarkupDeclarationOpen)
+ {
+ DONT_CONSUME_NEXT_INPUT_CHARACTER;
+ if (next_few_characters_are("DOCTYPE")) {
+ consume("DOCTYPE");
+ SWITCH_TO(DOCTYPE);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(DOCTYPE)
+ {
+ ON_WHITESPACE
+ {
+ SWITCH_TO(BeforeDOCTYPEName);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(BeforeDOCTYPEName)
+ {
+ ON_WHITESPACE
+ {
+ continue;
+ }
+ ANYTHING_ELSE
+ {
+ create_new_token(HTMLToken::Type::DOCTYPE);
+ m_current_token.m_doctype.name.append(current_input_character.value());
+ SWITCH_TO(DOCTYPEName);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(DOCTYPEName)
+ {
+ ON('>')
+ {
+ emit_current_token();
+ SWITCH_TO(Data);
+ }
+ ANYTHING_ELSE
+ {
+ m_current_token.m_doctype.name.append(current_input_character.value());
+ continue;
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(CharacterReference)
+ {
+ }
+ END_STATE
+
+ default:
+ ASSERT_NOT_REACHED();
+ }
+ }
+}
+
+void HTMLTokenizer::consume(const StringView& string)
+{
+ ASSERT(next_few_characters_are(string));
+ m_cursor += string.length();
+}
+
+bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
+{
+ for (size_t i = 0; i < string.length(); ++i) {
+ auto codepoint = peek_codepoint(i);
+ if (!codepoint.has_value())
+ return false;
+ // FIXME: This should be more Unicode-aware.
+ if (codepoint.value() != (u32)string[i])
+ return false;
+ }
+ return true;
+}
+
+void HTMLTokenizer::emit_current_token()
+{
+ StringBuilder builder;
+
+ switch (m_current_token.type()) {
+ case HTMLToken::Type::DOCTYPE:
+ builder.append("DOCTYPE");
+ builder.append(" { name: '");
+ builder.append(m_current_token.m_doctype.name.to_string());
+ builder.append("' }");
+ break;
+ case HTMLToken::Type::StartTag:
+ builder.append("StartTag");
+ break;
+ case HTMLToken::Type::EndTag:
+ builder.append("EndTag");
+ break;
+ case HTMLToken::Type::Comment:
+ builder.append("Comment");
+ break;
+ case HTMLToken::Type::Character:
+ builder.append("Character");
+ break;
+ case HTMLToken::Type::EndOfFile:
+ builder.append("EndOfFile");
+ break;
+ }
+
+ if (m_current_token.type() == HTMLToken::Type::StartTag || m_current_token.type() == HTMLToken::Type::EndTag) {
+ builder.append(" { name: '");
+ builder.append(m_current_token.m_tag.tag_name.to_string());
+ builder.append("' }");
+ }
+
+ dbg() << "[" << String::format("%42s", state_name(m_state)) << "] " << builder.to_string();
+ m_current_token = {};
+}
+
+void HTMLTokenizer::create_new_token(HTMLToken::Type type)
+{
+ m_current_token = {};
+ m_current_token.m_type = type;
+}
+
+HTMLTokenizer::HTMLTokenizer(const StringView& input)
+ : m_input(input)
+{
+}
+
+void HTMLTokenizer::will_switch_to([[maybe_unused]] State new_state)
+{
+#ifdef TOKENIZER_TRACE
+ dbg() << "[" << state_name(m_state) << "] Switch to " << state_name(new_state);
+#endif
+}
+
+void HTMLTokenizer::will_reconsume_in([[maybe_unused]] State new_state)
+{
+#ifdef TOKENIZER_TRACE
+ dbg() << "[" << state_name(m_state) << "] Reconsume in " << state_name(new_state);
+#endif
+}
+
+}
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.h b/Libraries/LibWeb/Parser/HTMLTokenizer.h
new file mode 100644
index 0000000000..ec5adecb92
--- /dev/null
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/StringView.h>
+#include <AK/Types.h>
+#include <LibWeb/Parser/HTMLToken.h>
+
+#define ENUMERATE_TOKENIZER_STATES \
+ __ENUMERATE_TOKENIZER_STATE(Data) \
+ __ENUMERATE_TOKENIZER_STATE(RCDATA) \
+ __ENUMERATE_TOKENIZER_STATE(RAWTEXT) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptData) \
+ __ENUMERATE_TOKENIZER_STATE(PLAINTEXT) \
+ __ENUMERATE_TOKENIZER_STATE(TagOpen) \
+ __ENUMERATE_TOKENIZER_STATE(EndTagOpen) \
+ __ENUMERATE_TOKENIZER_STATE(TagName) \
+ __ENUMERATE_TOKENIZER_STATE(RCDATALessThanSign) \
+ __ENUMERATE_TOKENIZER_STATE(RCDATAEndTagOpen) \
+ __ENUMERATE_TOKENIZER_STATE(RCDATAEndTagName) \
+ __ENUMERATE_TOKENIZER_STATE(RAWTEXTLessThanSign) \
+ __ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagOpen) \
+ __ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagName) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataLessThanSign) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagOpen) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagName) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStart) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStartDash) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscaped) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDash) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDashDash) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedLessThanSign) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagOpen) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagName) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeStart) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscaped) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDash) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDashDash) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedLessThanSign) \
+ __ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeEnd) \
+ __ENUMERATE_TOKENIZER_STATE(BeforeAttributeName) \
+ __ENUMERATE_TOKENIZER_STATE(AttributeName) \
+ __ENUMERATE_TOKENIZER_STATE(AfterAttributeName) \
+ __ENUMERATE_TOKENIZER_STATE(BeforeAttributeValue) \
+ __ENUMERATE_TOKENIZER_STATE(AttributeValueDoubleQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(AttributeValueSingleQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(AttributeValueUnquoted) \
+ __ENUMERATE_TOKENIZER_STATE(AfterAttributeValueQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(SelfClosingStartTag) \
+ __ENUMERATE_TOKENIZER_STATE(BogusComment) \
+ __ENUMERATE_TOKENIZER_STATE(MarkupDeclarationOpen) \
+ __ENUMERATE_TOKENIZER_STATE(CommentStart) \
+ __ENUMERATE_TOKENIZER_STATE(CommentStartDash) \
+ __ENUMERATE_TOKENIZER_STATE(Comment) \
+ __ENUMERATE_TOKENIZER_STATE(CommentLessThanSign) \
+ __ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBang) \
+ __ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDash) \
+ __ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDashDash) \
+ __ENUMERATE_TOKENIZER_STATE(CommentEndDash) \
+ __ENUMERATE_TOKENIZER_STATE(CommentEnd) \
+ __ENUMERATE_TOKENIZER_STATE(CommentEndBang) \
+ __ENUMERATE_TOKENIZER_STATE(DOCTYPE) \
+ __ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEName) \
+ __ENUMERATE_TOKENIZER_STATE(DOCTYPEName) \
+ __ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEName) \
+ __ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicKeyword) \
+ __ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEPublicIdentifier) \
+ __ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierDoubleQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierSingleQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicIdentifier) \
+ __ENUMERATE_TOKENIZER_STATE(BetweenDOCTYPEPublicAndSystemIdentifiers) \
+ __ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemKeyword) \
+ __ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPESystemIdentifier) \
+ __ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierDoubleQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierSingleQuoted) \
+ __ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemIdentifier) \
+ __ENUMERATE_TOKENIZER_STATE(BogusDOCTYPE) \
+ __ENUMERATE_TOKENIZER_STATE(CDATASection) \
+ __ENUMERATE_TOKENIZER_STATE(CDATASectionBracket) \
+ __ENUMERATE_TOKENIZER_STATE(CDATASectionEnd) \
+ __ENUMERATE_TOKENIZER_STATE(CharacterReference) \
+ __ENUMERATE_TOKENIZER_STATE(NamedCharacterReference) \
+ __ENUMERATE_TOKENIZER_STATE(AmbiguousAmpersand) \
+ __ENUMERATE_TOKENIZER_STATE(NumericCharacterReference) \
+ __ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReferenceStart) \
+ __ENUMERATE_TOKENIZER_STATE(DecimalCharacterReferenceStart) \
+ __ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReference) \
+ __ENUMERATE_TOKENIZER_STATE(DecimalCharacterReference) \
+ __ENUMERATE_TOKENIZER_STATE(NumericCharacterReferenceEnd)
+
+namespace Web {
+
+class HTMLTokenizer {
+public:
+ explicit HTMLTokenizer(const StringView& input);
+
+ void run();
+
+private:
+ Optional<u32> next_codepoint();
+ Optional<u32> peek_codepoint(size_t offset) const;
+ bool next_few_characters_are(const StringView&) const;
+ void consume(const StringView&);
+ void emit_current_token();
+ void create_new_token(HTMLToken::Type);
+
+ enum class State {
+#define __ENUMERATE_TOKENIZER_STATE(state) state,
+ ENUMERATE_TOKENIZER_STATES
+#undef __ENUMERATE_TOKENIZER_STATE
+ };
+
+ static const char* state_name(State state)
+ {
+ switch (state) {
+#define __ENUMERATE_TOKENIZER_STATE(state) \
+ case State::state: \
+ return #state;
+ ENUMERATE_TOKENIZER_STATES
+#undef __ENUMERATE_TOKENIZER_STATE
+ };
+ ASSERT_NOT_REACHED();
+ }
+
+ void will_switch_to(State);
+ void will_reconsume_in(State);
+
+ State m_state { State::Data };
+ State m_return_state { State::Data };
+
+ StringView m_input;
+ size_t m_cursor { 0 };
+
+ HTMLToken m_current_token;
+};
+}