summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Wipfli <mail@maxwipfli.ch>2021-07-15 00:03:50 +0200
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2021-07-17 16:24:57 +0430
commit8b31e41692014ce51f920cfc43fe7736f984f80c (patch)
treeaa2d86f192e9e50e73814b6ee165fd447eeecca7
parent918bde98b17ad890ba0a83c1de137da86519f9b1 (diff)
downloadserenity-8b31e41692014ce51f920cfc43fe7736f984f80c.zip
LibWeb: Change HTMLToken::m_doctype into named DoctypeData struct
This is in preparation for an upcoming storage change of HTMLToken. In contrast to the other token types, the accessor can hand out a mutable reference to allow users to change parts of the DoctypeData easily.
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp18
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp2
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h35
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp116
4 files changed, 92 insertions, 79 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
index 10fd18ed0e..5daa31d2b9 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
@@ -258,15 +258,15 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const
{
- if (doctype_token.m_doctype.force_quirks)
+ if (doctype_token.doctype_data().force_quirks)
return DOM::QuirksMode::Yes;
// NOTE: The tokenizer puts the name into lower case for us.
- if (doctype_token.m_doctype.name != "html")
+ if (doctype_token.doctype_data().name != "html")
return DOM::QuirksMode::Yes;
- auto const& public_identifier = doctype_token.m_doctype.public_identifier;
- auto const& system_identifier = doctype_token.m_doctype.system_identifier;
+ auto const& public_identifier = doctype_token.doctype_data().public_identifier;
+ auto const& system_identifier = doctype_token.doctype_data().system_identifier;
if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"))
return DOM::QuirksMode::Yes;
@@ -285,7 +285,7 @@ DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_t
return DOM::QuirksMode::Yes;
}
- if (doctype_token.m_doctype.missing_system_identifier) {
+ if (doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes;
@@ -299,7 +299,7 @@ DOM::QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_t
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
- if (!doctype_token.m_doctype.missing_system_identifier) {
+ if (!doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited;
@@ -324,9 +324,9 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
if (token.is_doctype()) {
auto doctype = adopt_ref(*new DOM::DocumentType(document()));
- doctype->set_name(token.m_doctype.name);
- doctype->set_public_id(token.m_doctype.public_identifier);
- doctype->set_system_id(token.m_doctype.system_identifier);
+ doctype->set_name(token.doctype_data().name);
+ doctype->set_public_id(token.doctype_data().public_identifier);
+ doctype->set_system_id(token.doctype_data().system_identifier);
document().append_child(move(doctype));
document().set_quirks_mode(which_quirks_mode(token));
m_insertion_mode = InsertionMode::BeforeHTML;
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
index 39fd79e9ab..df662c7e00 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
@@ -16,7 +16,7 @@ String HTMLToken::to_string() const
case HTMLToken::Type::DOCTYPE:
builder.append("DOCTYPE");
builder.append(" { name: '");
- builder.append(m_doctype.name);
+ builder.append(doctype_data().name);
builder.append("' }");
break;
case HTMLToken::Type::StartTag:
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
index ea310fe8c9..49de74d50d 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
@@ -47,6 +47,17 @@ public:
Position value_end_position;
};
+ struct DoctypeData {
+ // NOTE: "Missing" is a distinct state from the empty string.
+ String name;
+ String public_identifier;
+ String system_identifier;
+ bool missing_name { true };
+ bool missing_public_identifier { true };
+ bool missing_system_identifier { true };
+ bool force_quirks { false };
+ };
+
static HTMLToken make_character(u32 code_point)
{
HTMLToken token;
@@ -252,6 +263,18 @@ public:
});
}
+ DoctypeData const& doctype_data() const
+ {
+ VERIFY(is_doctype());
+ return m_doctype;
+ }
+
+ DoctypeData& doctype_data()
+ {
+ VERIFY(is_doctype());
+ return m_doctype;
+ }
+
Type type() const { return m_type; }
String to_string() const;
@@ -263,17 +286,7 @@ private:
Type m_type { Type::Invalid };
// Type::DOCTYPE
- struct {
- // NOTE: "Missing" is a distinct state from the empty string.
-
- String name;
- bool missing_name { true };
- String public_identifier;
- bool missing_public_identifier { true };
- String system_identifier;
- bool missing_system_identifier { true };
- bool force_quirks { false };
- } m_doctype;
+ DoctypeData m_doctype;
// Type::StartTag
// Type::EndTag
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
index f43f6bc702..fb730b28d5 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -440,7 +440,7 @@ _StartOfFunction:
{
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -462,7 +462,7 @@ _StartOfFunction:
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_builder.append_code_point(to_ascii_lowercase(current_input_character.value()));
- m_current_token.m_doctype.missing_name = false;
+ m_current_token.doctype_data().missing_name = false;
SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName);
}
ON(0)
@@ -470,21 +470,21 @@ _StartOfFunction:
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_builder.append_code_point(0xFFFD);
- m_current_token.m_doctype.missing_name = false;
+ m_current_token.doctype_data().missing_name = false;
SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName);
}
ON('>')
{
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
create_new_token(HTMLToken::Type::DOCTYPE);
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -492,7 +492,7 @@ _StartOfFunction:
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_builder.append_code_point(current_input_character.value());
- m_current_token.m_doctype.missing_name = false;
+ m_current_token.doctype_data().missing_name = false;
SWITCH_TO_WITH_UNCLEAN_BUILDER(DOCTYPEName);
}
}
@@ -502,12 +502,12 @@ _StartOfFunction:
{
ON_WHITESPACE
{
- m_current_token.m_doctype.name = consume_current_builder();
+ m_current_token.doctype_data().name = consume_current_builder();
SWITCH_TO(AfterDOCTYPEName);
}
ON('>')
{
- m_current_token.m_doctype.name = consume_current_builder();
+ m_current_token.doctype_data().name = consume_current_builder();
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_ASCII_UPPER_ALPHA
@@ -524,7 +524,7 @@ _StartOfFunction:
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -549,7 +549,7 @@ _StartOfFunction:
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -562,7 +562,7 @@ _StartOfFunction:
SWITCH_TO(AfterDOCTYPESystemKeyword);
}
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -577,32 +577,32 @@ _StartOfFunction:
ON('"')
{
log_parse_error();
- m_current_token.m_doctype.missing_public_identifier = false;
+ m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
log_parse_error();
- m_current_token.m_doctype.missing_public_identifier = false;
+ m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -617,34 +617,34 @@ _StartOfFunction:
ON('"')
{
log_parse_error();
- m_current_token.m_doctype.system_identifier = {};
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().system_identifier = {};
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
log_parse_error();
- m_current_token.m_doctype.system_identifier = {};
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().system_identifier = {};
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -658,31 +658,31 @@ _StartOfFunction:
}
ON('"')
{
- m_current_token.m_doctype.missing_public_identifier = false;
+ m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
- m_current_token.m_doctype.missing_public_identifier = false;
+ m_current_token.doctype_data().missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -696,31 +696,31 @@ _StartOfFunction:
}
ON('"')
{
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -730,7 +730,7 @@ _StartOfFunction:
{
ON('"')
{
- m_current_token.m_doctype.public_identifier = consume_current_builder();
+ m_current_token.doctype_data().public_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPEPublicIdentifier);
}
ON(0)
@@ -742,14 +742,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.public_identifier = consume_current_builder();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().public_identifier = consume_current_builder();
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -765,7 +765,7 @@ _StartOfFunction:
{
ON('\'')
{
- m_current_token.m_doctype.public_identifier = consume_current_builder();
+ m_current_token.doctype_data().public_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPEPublicIdentifier);
}
ON(0)
@@ -777,14 +777,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.public_identifier = consume_current_builder();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().public_identifier = consume_current_builder();
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -800,7 +800,7 @@ _StartOfFunction:
{
ON('"')
{
- m_current_token.m_doctype.public_identifier = consume_current_builder();
+ m_current_token.doctype_data().public_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPESystemIdentifier);
}
ON(0)
@@ -812,14 +812,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.public_identifier = consume_current_builder();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().public_identifier = consume_current_builder();
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -835,7 +835,7 @@ _StartOfFunction:
{
ON('\'')
{
- m_current_token.m_doctype.system_identifier = consume_current_builder();
+ m_current_token.doctype_data().system_identifier = consume_current_builder();
SWITCH_TO(AfterDOCTYPESystemIdentifier);
}
ON(0)
@@ -847,14 +847,14 @@ _StartOfFunction:
ON('>')
{
log_parse_error();
- m_current_token.m_doctype.system_identifier = consume_current_builder();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().system_identifier = consume_current_builder();
+ m_current_token.doctype_data().force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
@@ -879,26 +879,26 @@ _StartOfFunction:
ON('"')
{
log_parse_error();
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
log_parse_error();
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -916,25 +916,25 @@ _StartOfFunction:
}
ON('"')
{
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
- m_current_token.m_doctype.missing_system_identifier = false;
+ m_current_token.doctype_data().missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}
ANYTHING_ELSE
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
}
}
@@ -953,7 +953,7 @@ _StartOfFunction:
ON_EOF
{
log_parse_error();
- m_current_token.m_doctype.force_quirks = true;
+ m_current_token.doctype_data().force_quirks = true;
m_queued_tokens.enqueue(move(m_current_token));
EMIT_EOF;
}