diff options
author | TheDumpap <petervivemail@gmail.com> | 2020-05-28 00:28:32 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-30 17:47:50 +0200 |
commit | d92c9d37724f16e4b00b2411d51184407082314f (patch) | |
tree | b45917c8e7d02596fb7d71318af8fab2b6276561 /Libraries/LibWeb | |
parent | c742306b6f4764e15212246fb4ef6477b46a4754 (diff) | |
download | serenity-d92c9d37724f16e4b00b2411d51184407082314f.zip |
LibWeb: Implement more of the tokenizer states
Slowly adding more unimplemented options for tokenizer states.
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 120 |
1 files changed, 112 insertions, 8 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 404237f19a..bbf61d4c26 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -230,6 +230,11 @@ _StartOfFunction: { SWITCH_TO(TagOpen); } + ON(0) + { + PARSE_ERROR(); + EMIT_CURRENT_CHARACTER; + } ON_EOF { EMIT_EOF; @@ -258,11 +263,19 @@ _StartOfFunction: } ON('?') { + PARSE_ERROR(); + create_new_token(HTMLToken::Type::Comment); + RECONSUME_IN(BogusComment); + } + ON_EOF + { TODO(); } ANYTHING_ELSE { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER('<'); + RECONSUME_IN(Data); } } END_STATE @@ -281,6 +294,22 @@ _StartOfFunction: { SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } + ON_ASCII_UPPER_ALPHA + { + m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + continue; + } + ON(0) + { + PARSE_ERROR(); + m_current_token.m_tag.tag_name.append("\uFFFD"); + continue; + } + ON_EOF + { + PARSE_ERROR(); + EMIT_EOF; + } ANYTHING_ELSE { m_current_token.m_tag.tag_name.append(current_input_character.value()); @@ -296,6 +325,23 @@ _StartOfFunction: create_new_token(HTMLToken::Type::EndTag); RECONSUME_IN(TagName); } + ON('>') + { + PARSE_ERROR(); + SWITCH_TO(Data); + } + ON_EOF + { + PARSE_ERROR(); + // FIXME: Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character token and an end-of-file token. + continue; + } + ANYTHING_ELSE + { + PARSE_ERROR(); + create_new_token(HTMLToken::Type::Comment); + RECONSUME_IN(BogusComment); + } } END_STATE @@ -312,6 +358,27 @@ _StartOfFunction: } END_STATE + BEGIN_STATE(BogusComment) + { + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ON(0) + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + BEGIN_STATE(DOCTYPE) { ON_WHITESPACE @@ -1415,7 +1482,8 @@ _StartOfFunction: } ON(0) { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); } ON_EOF { @@ -1462,11 +1530,19 @@ _StartOfFunction: { ON_WHITESPACE { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(BeforeAttributeName); } ON('/') { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(SelfClosingStartTag); } ON('>') { @@ -1503,7 +1579,8 @@ _StartOfFunction: } ON(0) { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); } ON_EOF { @@ -1550,11 +1627,19 @@ _StartOfFunction: { ON_WHITESPACE { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(BeforeAttributeName); } ON('/') { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(SelfClosingStartTag); } ON('>') { @@ -1591,7 +1676,26 @@ _StartOfFunction: } ON(0) { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); + } + ON_EOF + { + EMIT_EOF; + } + ANYTHING_ELSE + { + EMIT_CURRENT_CHARACTER; + } + } + END_STATE + + BEGIN_STATE(PLAINTEXT) + { + ON(0) + { + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); } ON_EOF { |