summaryrefslogtreecommitdiff
path: root/Libraries/LibWeb
diff options
context:
space:
mode:
authorTheDumpap <petervivemail@gmail.com>2020-05-28 00:28:32 +0200
committerAndreas Kling <kling@serenityos.org>2020-05-30 17:47:50 +0200
commitd92c9d37724f16e4b00b2411d51184407082314f (patch)
treeb45917c8e7d02596fb7d71318af8fab2b6276561 /Libraries/LibWeb
parentc742306b6f4764e15212246fb4ef6477b46a4754 (diff)
downloadserenity-d92c9d37724f16e4b00b2411d51184407082314f.zip
LibWeb: Implement more of the tokenizer states
Slowly adding more unimplemented options for tokenizer states.
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.cpp120
1 files changed, 112 insertions, 8 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
index 404237f19a..bbf61d4c26 100644
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -230,6 +230,11 @@ _StartOfFunction:
{
SWITCH_TO(TagOpen);
}
+ ON(0)
+ {
+ PARSE_ERROR();
+ EMIT_CURRENT_CHARACTER;
+ }
ON_EOF
{
EMIT_EOF;
@@ -258,11 +263,19 @@ _StartOfFunction:
}
ON('?')
{
+ PARSE_ERROR();
+ create_new_token(HTMLToken::Type::Comment);
+ RECONSUME_IN(BogusComment);
+ }
+ ON_EOF
+ {
TODO();
}
ANYTHING_ELSE
{
- TODO();
+ PARSE_ERROR();
+ EMIT_CHARACTER('<');
+ RECONSUME_IN(Data);
}
}
END_STATE
@@ -281,6 +294,22 @@ _StartOfFunction:
{
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
+ ON_ASCII_UPPER_ALPHA
+ {
+ m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
+ continue;
+ }
+ ON(0)
+ {
+ PARSE_ERROR();
+ m_current_token.m_tag.tag_name.append("\uFFFD");
+ continue;
+ }
+ ON_EOF
+ {
+ PARSE_ERROR();
+ EMIT_EOF;
+ }
ANYTHING_ELSE
{
m_current_token.m_tag.tag_name.append(current_input_character.value());
@@ -296,6 +325,23 @@ _StartOfFunction:
create_new_token(HTMLToken::Type::EndTag);
RECONSUME_IN(TagName);
}
+ ON('>')
+ {
+ PARSE_ERROR();
+ SWITCH_TO(Data);
+ }
+ ON_EOF
+ {
+ PARSE_ERROR();
+ // FIXME: Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character token and an end-of-file token.
+ continue;
+ }
+ ANYTHING_ELSE
+ {
+ PARSE_ERROR();
+ create_new_token(HTMLToken::Type::Comment);
+ RECONSUME_IN(BogusComment);
+ }
}
END_STATE
@@ -312,6 +358,27 @@ _StartOfFunction:
}
END_STATE
+ BEGIN_STATE(BogusComment)
+ {
+ ON('>')
+ {
+ TODO();
+ }
+ ON_EOF
+ {
+ TODO();
+ }
+ ON(0)
+ {
+ TODO();
+ }
+ ANYTHING_ELSE
+ {
+ TODO();
+ }
+ }
+ END_STATE
+
BEGIN_STATE(DOCTYPE)
{
ON_WHITESPACE
@@ -1415,7 +1482,8 @@ _StartOfFunction:
}
ON(0)
{
- TODO();
+ PARSE_ERROR();
+ EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{
@@ -1462,11 +1530,19 @@ _StartOfFunction:
{
ON_WHITESPACE
{
- TODO();
+ if (!current_end_tag_token_is_appropriate()) {
+ // FIXME: Otherwise, treat it as per the "anything else" entry below.
+ TODO();
+ }
+ SWITCH_TO(BeforeAttributeName);
}
ON('/')
{
- TODO();
+ if (!current_end_tag_token_is_appropriate()) {
+ // FIXME: Otherwise, treat it as per the "anything else" entry below.
+ TODO();
+ }
+ SWITCH_TO(SelfClosingStartTag);
}
ON('>')
{
@@ -1503,7 +1579,8 @@ _StartOfFunction:
}
ON(0)
{
- TODO();
+ PARSE_ERROR();
+ EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{
@@ -1550,11 +1627,19 @@ _StartOfFunction:
{
ON_WHITESPACE
{
- TODO();
+ if (!current_end_tag_token_is_appropriate()) {
+ // FIXME: Otherwise, treat it as per the "anything else" entry below.
+ TODO();
+ }
+ SWITCH_TO(BeforeAttributeName);
}
ON('/')
{
- TODO();
+ if (!current_end_tag_token_is_appropriate()) {
+ // FIXME: Otherwise, treat it as per the "anything else" entry below.
+ TODO();
+ }
+ SWITCH_TO(SelfClosingStartTag);
}
ON('>')
{
@@ -1591,7 +1676,26 @@ _StartOfFunction:
}
ON(0)
{
- TODO();
+ PARSE_ERROR();
+ EMIT_CHARACTER("\uFFFD");
+ }
+ ON_EOF
+ {
+ EMIT_EOF;
+ }
+ ANYTHING_ELSE
+ {
+ EMIT_CURRENT_CHARACTER;
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(PLAINTEXT)
+ {
+ ON(0)
+ {
+ PARSE_ERROR();
+ EMIT_CHARACTER("\uFFFD");
}
ON_EOF
{