diff options
author | Andreas Kling <kling@serenityos.org> | 2020-05-23 19:56:07 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-23 21:08:25 +0200 |
commit | e45c8b842c64bef30286ac3a34fc5385d1308a9f (patch) | |
tree | 3a0a8ead1ac0b3f61d06580f0ab842042323be23 /Libraries | |
parent | b5c0703e1468c96d73dc3077d73df976dc19a214 (diff) | |
download | serenity-e45c8b842c64bef30286ac3a34fc5385d1308a9f.zip |
LibWeb: Implement a bit more of DOCTYPE tokenization
Diffstat (limited to 'Libraries')
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 39e3f8b594..b5835446d3 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -58,6 +58,9 @@ #define ON_ASCII_ALPHA \ if (current_input_character.has_value() && isalpha(current_input_character.value())) +#define ON_ASCII_UPPER_ALPHA \ + if (current_input_character.has_value() && current_input_character.value() >= 'A' && current_input_character.value() <= 'Z') + #define ON_WHITESPACE \ if (current_input_character.has_value() && (current_input_character.value() == '\t' || current_input_character.value() == '\a' || current_input_character.value() == '\f' || current_input_character.value() == ' ')) @@ -207,6 +210,18 @@ void HTMLTokenizer::run() { SWITCH_TO(BeforeDOCTYPEName); } + ON('>') + { + RECONSUME_IN(BeforeDOCTYPEName); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } } END_STATE @@ -216,6 +231,24 @@ void HTMLTokenizer::run() { continue; } + ON_ASCII_UPPER_ALPHA + { + create_new_token(HTMLToken::Type::DOCTYPE); + m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + SWITCH_TO(DOCTYPEName); + } + ON(0) + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } ANYTHING_ELSE { create_new_token(HTMLToken::Type::DOCTYPE); @@ -227,11 +260,27 @@ void HTMLTokenizer::run() BEGIN_STATE(DOCTYPEName) { + ON_WHITESPACE + { + SWITCH_TO(AfterDOCTYPEName); + } ON('>') { emit_current_token(); SWITCH_TO(Data); } + ON_ASCII_UPPER_ALPHA + { + m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + } + ON(0) + { + TODO(); + } + ON_EOF + { + TODO(); + } ANYTHING_ELSE { m_current_token.m_doctype.name.append(current_input_character.value()); @@ -240,6 +289,28 @@ void HTMLTokenizer::run() } END_STATE + BEGIN_STATE(AfterDOCTYPEName) + { + ON_WHITESPACE + { + continue; + } + ON('>') + { + emit_current_token(); + SWITCH_TO(Data); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + BEGIN_STATE(BeforeAttributeName) { ON_WHITESPACE |