summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2020-05-23 19:56:07 +0200
committerAndreas Kling <kling@serenityos.org>2020-05-23 21:08:25 +0200
commite45c8b842c64bef30286ac3a34fc5385d1308a9f (patch)
tree3a0a8ead1ac0b3f61d06580f0ab842042323be23
parentb5c0703e1468c96d73dc3077d73df976dc19a214 (diff)
downloadserenity-e45c8b842c64bef30286ac3a34fc5385d1308a9f.zip
LibWeb: Implement a bit more of DOCTYPE tokenization
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.cpp71
1 files changed, 71 insertions, 0 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
index 39e3f8b594..b5835446d3 100644
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -58,6 +58,9 @@
#define ON_ASCII_ALPHA \
if (current_input_character.has_value() && isalpha(current_input_character.value()))
+#define ON_ASCII_UPPER_ALPHA \
+ if (current_input_character.has_value() && current_input_character.value() >= 'A' && current_input_character.value() <= 'Z')
+
#define ON_WHITESPACE \
if (current_input_character.has_value() && (current_input_character.value() == '\t' || current_input_character.value() == '\a' || current_input_character.value() == '\f' || current_input_character.value() == ' '))
@@ -207,6 +210,18 @@ void HTMLTokenizer::run()
{
SWITCH_TO(BeforeDOCTYPEName);
}
+ ON('>')
+ {
+ RECONSUME_IN(BeforeDOCTYPEName);
+ }
+ ON_EOF
+ {
+ TODO();
+ }
+ ANYTHING_ELSE
+ {
+ TODO();
+ }
}
END_STATE
@@ -216,6 +231,24 @@ void HTMLTokenizer::run()
{
continue;
}
+ ON_ASCII_UPPER_ALPHA
+ {
+ create_new_token(HTMLToken::Type::DOCTYPE);
+ m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
+ SWITCH_TO(DOCTYPEName);
+ }
+ ON(0)
+ {
+ TODO();
+ }
+ ON('>')
+ {
+ TODO();
+ }
+ ON_EOF
+ {
+ TODO();
+ }
ANYTHING_ELSE
{
create_new_token(HTMLToken::Type::DOCTYPE);
@@ -227,11 +260,27 @@ void HTMLTokenizer::run()
BEGIN_STATE(DOCTYPEName)
{
+ ON_WHITESPACE
+ {
+ SWITCH_TO(AfterDOCTYPEName);
+ }
ON('>')
{
emit_current_token();
SWITCH_TO(Data);
}
+ ON_ASCII_UPPER_ALPHA
+ {
+ m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
+ }
+ ON(0)
+ {
+ TODO();
+ }
+ ON_EOF
+ {
+ TODO();
+ }
ANYTHING_ELSE
{
m_current_token.m_doctype.name.append(current_input_character.value());
@@ -240,6 +289,28 @@ void HTMLTokenizer::run()
}
END_STATE
+ BEGIN_STATE(AfterDOCTYPEName)
+ {
+ ON_WHITESPACE
+ {
+ continue;
+ }
+ ON('>')
+ {
+ emit_current_token();
+ SWITCH_TO(Data);
+ }
+ ON_EOF
+ {
+ TODO();
+ }
+ ANYTHING_ELSE
+ {
+ TODO();
+ }
+ }
+ END_STATE
+
BEGIN_STATE(BeforeAttributeName)
{
ON_WHITESPACE