From 406fd95f327bd196e3781f8b1a21cfdcee684c2b Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 25 May 2020 19:50:44 +0200 Subject: LibWeb: Flesh out the remaining DOCTYPE related tokenizer states We can now parse public and system identifiers! Not super useful, but at least we can do it :^) --- Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 311 ++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) (limited to 'Libraries') diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index b0b1bfaab0..ba4df5d474 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -322,6 +322,317 @@ Optional HTMLTokenizer::next_token() END_STATE BEGIN_STATE(AfterDOCTYPEName) + { + ON_WHITESPACE + { + continue; + } + ON('>') + { + SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + if (toupper(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) { + SWITCH_TO(AfterDOCTYPEPublicKeyword); + } + if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) { + SWITCH_TO(AfterDOCTYPESystemKeyword); + } + TODO(); + } + } + END_STATE + + BEGIN_STATE(AfterDOCTYPEPublicKeyword) + { + ON_WHITESPACE + { + SWITCH_TO(BeforeDOCTYPEPublicIdentifier); + } + ON('"') + { + TODO(); + } + ON('\'') + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + + BEGIN_STATE(AfterDOCTYPESystemKeyword) + { + ON_WHITESPACE + { + SWITCH_TO(BeforeDOCTYPESystemIdentifier); + } + ON('"') + { + TODO(); + } + ON('\'') + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + + BEGIN_STATE(BeforeDOCTYPEPublicIdentifier) + { + ON_WHITESPACE + { + continue; + } + ON('"') + { + m_current_token.m_doctype.public_identifier.clear(); + SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); + } + ON('\'') + { + m_current_token.m_doctype.public_identifier.clear(); + SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + + BEGIN_STATE(BeforeDOCTYPESystemIdentifier) + { + ON_WHITESPACE + { + continue; + } + ON('"') + { + m_current_token.m_doctype.system_identifier.clear(); + SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); + } + ON('\'') + { + m_current_token.m_doctype.system_identifier.clear(); + SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + + BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuoted) + { + ON('"') + { + SWITCH_TO(AfterDOCTYPEPublicIdentifier); + } + ON(0) + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + m_current_token.m_doctype.public_identifier.append(current_input_character.value()); + continue; + } + } + END_STATE + + BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuoted) + { + ON('\'') + { + SWITCH_TO(AfterDOCTYPEPublicIdentifier); + } + ON(0) + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + m_current_token.m_doctype.public_identifier.append(current_input_character.value()); + continue; + } + } + END_STATE + + BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuoted) + { + ON('"') + { + SWITCH_TO(AfterDOCTYPESystemIdentifier); + } + ON(0) + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + m_current_token.m_doctype.system_identifier.append(current_input_character.value()); + continue; + } + } + END_STATE + + BEGIN_STATE(DOCTYPESystemIdentifierSingleQuoted) + { + ON('\'') + { + SWITCH_TO(AfterDOCTYPESystemIdentifier); + } + ON(0) + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + m_current_token.m_doctype.system_identifier.append(current_input_character.value()); + continue; + } + } + END_STATE + + BEGIN_STATE(AfterDOCTYPEPublicIdentifier) + { + ON_WHITESPACE + { + SWITCH_TO(BetweenDOCTYPEPublicAndSystemIdentifiers); + } + ON('>') + { + SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); + } + ON('"') + { + TODO(); + } + ON('\'') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + + BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiers) + { + ON_WHITESPACE + { + continue; + } + ON('>') + { + SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); + } + ON('"') + { + m_current_token.m_doctype.system_identifier.clear(); + SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); + } + ON('\'') + { + m_current_token.m_doctype.system_identifier.clear(); + SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + + BEGIN_STATE(AfterDOCTYPESystemIdentifier) { ON_WHITESPACE { -- cgit v1.2.3