summaryrefslogtreecommitdiff
path: root/Userland/Libraries
diff options
context:
space:
mode:
Diffstat (limited to 'Userland/Libraries')
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp12
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp2
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h10
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp75
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h1
5 files changed, 54 insertions, 46 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
index fb3f82f5dd..22e27a3f43 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
@@ -317,7 +317,7 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
}
if (token.is_comment()) {
- auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string()));
+ auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data));
document().append_child(move(comment));
return;
}
@@ -347,7 +347,7 @@ void HTMLDocumentParser::handle_before_html(HTMLToken& token)
}
if (token.is_comment()) {
- auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string()));
+ auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data));
document().append_child(move(comment));
return;
}
@@ -520,7 +520,7 @@ AnythingElse:
void HTMLDocumentParser::insert_comment(HTMLToken& token)
{
- auto data = token.m_comment_or_character.data.to_string();
+ auto data = token.m_comment_or_character.data;
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
adjusted_insertion_location.parent->insert_before(adopt_ref(*new DOM::Comment(document(), data)), adjusted_insertion_location.insert_before_sibling);
}
@@ -832,7 +832,7 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token)
}
if (token.is_comment()) {
- auto data = token.m_comment_or_character.data.to_string();
+ auto data = token.m_comment_or_character.data;
auto& insertion_location = m_stack_of_open_elements.first();
insertion_location.append_child(adopt_ref(*new DOM::Comment(document(), data)));
return;
@@ -870,7 +870,7 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token)
void HTMLDocumentParser::handle_after_after_body(HTMLToken& token)
{
if (token.is_comment()) {
- auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string()));
+ auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data));
document().append_child(move(comment));
return;
}
@@ -2751,7 +2751,7 @@ void HTMLDocumentParser::handle_after_frameset(HTMLToken& token)
void HTMLDocumentParser::handle_after_after_frameset(HTMLToken& token)
{
if (token.is_comment()) {
- auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string()));
+ auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data));
document().append_child(move(comment));
return;
}
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
index eccf3c1723..432df887b9 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
@@ -53,7 +53,7 @@ String HTMLToken::to_string() const
if (type() == HTMLToken::Type::Comment || type() == HTMLToken::Type::Character) {
builder.append(" { data: '");
- builder.append(m_comment_or_character.data.to_string());
+ builder.append(m_comment_or_character.data);
builder.append("' }");
}
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
index 95475d8c1f..0b0366ef94 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h
@@ -8,7 +8,6 @@
#include <AK/FlyString.h>
#include <AK/String.h>
-#include <AK/StringBuilder.h>
#include <AK/Types.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
@@ -34,7 +33,10 @@ public:
{
HTMLToken token;
token.m_type = Type::Character;
- token.m_comment_or_character.data.append(code_point);
+ StringBuilder builder;
+ // FIXME: This narrows code_point to char, should this be append_code_point() instead?
+ builder.append(code_point);
+ token.m_comment_or_character.data = builder.to_string();
return token;
}
@@ -56,7 +58,7 @@ public:
u32 code_point() const
{
VERIFY(is_character());
- Utf8View view(m_comment_or_character.data.string_view());
+ Utf8View view(m_comment_or_character.data);
VERIFY(view.length() == 1);
return *view.begin();
}
@@ -209,7 +211,7 @@ private:
// Type::Comment
// Type::Character
struct {
- StringBuilder data;
+ String data;
} m_comment_or_character;
Position m_start_position;
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
index 312875ece5..90d13540f4 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -74,17 +74,18 @@ namespace Web::HTML {
goto new_state; \
} while (0)
-#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \
- do { \
- for (auto code_point : m_temporary_buffer) { \
- if (consumed_as_part_of_an_attribute()) { \
- m_current_builder.append_code_point(code_point); \
- } else { \
- create_new_token(HTMLToken::Type::Character); \
- m_current_token.m_comment_or_character.data.append_code_point(code_point); \
- m_queued_tokens.enqueue(m_current_token); \
- } \
- } \
+#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \
+ do { \
+ for (auto code_point : m_temporary_buffer) { \
+ if (consumed_as_part_of_an_attribute()) { \
+ m_current_builder.append_code_point(code_point); \
+ } else { \
+ create_new_token(HTMLToken::Type::Character); \
+ m_current_builder.append_code_point(code_point); \
+ m_current_token.m_comment_or_character.data = consume_current_builder(); \
+ m_queued_tokens.enqueue(m_current_token); \
+ } \
+ } \
} while (0)
#define DONT_CONSUME_NEXT_INPUT_CHARACTER \
@@ -139,12 +140,13 @@ namespace Web::HTML {
return m_queued_tokens.dequeue(); \
} while (0)
-#define EMIT_CHARACTER(code_point) \
- do { \
- create_new_token(HTMLToken::Type::Character); \
- m_current_token.m_comment_or_character.data.append_code_point(code_point); \
- m_queued_tokens.enqueue(m_current_token); \
- return m_queued_tokens.dequeue(); \
+#define EMIT_CHARACTER(code_point) \
+ do { \
+ create_new_token(HTMLToken::Type::Character); \
+ m_current_builder.append_code_point(code_point); \
+ m_current_token.m_comment_or_character.data = consume_current_builder(); \
+ m_queued_tokens.enqueue(m_current_token); \
+ return m_queued_tokens.dequeue(); \
} while (0)
#define EMIT_CURRENT_CHARACTER \
@@ -402,6 +404,7 @@ _StartOfFunction:
{
ON('>')
{
+ m_current_token.m_comment_or_character.data = consume_current_builder();
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
@@ -412,12 +415,12 @@ _StartOfFunction:
ON(0)
{
log_parse_error();
- m_current_token.m_comment_or_character.data.append_code_point(0xFFFD);
+ m_current_builder.append_code_point(0xFFFD);
continue;
}
ANYTHING_ELSE
{
- m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value());
+ m_current_builder.append_code_point(current_input_character.value());
continue;
}
}
@@ -1346,11 +1349,12 @@ _StartOfFunction:
{
ON('-')
{
- SWITCH_TO(CommentEnd);
+ SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentEnd);
}
ON('>')
{
log_parse_error();
+ consume_current_builder();
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
@@ -1361,7 +1365,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- m_current_token.m_comment_or_character.data.append('-');
+ m_current_builder.append('-');
RECONSUME_IN(Comment);
}
}
@@ -1371,17 +1375,17 @@ _StartOfFunction:
{
ON('<')
{
- m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value());
- SWITCH_TO(CommentLessThanSign);
+ m_current_builder.append_code_point(current_input_character.value());
+ SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentLessThanSign);
}
ON('-')
{
- SWITCH_TO(CommentEndDash);
+ SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentEndDash);
}
ON(0)
{
log_parse_error();
- m_current_token.m_comment_or_character.data.append_code_point(0xFFFD);
+ m_current_builder.append_code_point(0xFFFD);
continue;
}
ON_EOF
@@ -1392,7 +1396,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value());
+ m_current_builder.append_code_point(current_input_character.value());
continue;
}
}
@@ -1402,6 +1406,7 @@ _StartOfFunction:
{
ON('>')
{
+ m_current_token.m_comment_or_character.data = consume_current_builder();
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON('!')
@@ -1410,7 +1415,7 @@ _StartOfFunction:
}
ON('-')
{
- m_current_token.m_comment_or_character.data.append('-');
+ m_current_builder.append('-');
continue;
}
ON_EOF
@@ -1421,7 +1426,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- m_current_token.m_comment_or_character.data.append('-');
+ m_current_builder.append('-');
RECONSUME_IN(Comment);
}
}
@@ -1431,7 +1436,7 @@ _StartOfFunction:
{
ON('-')
{
- m_current_token.m_comment_or_character.data.append("--!");
+ m_current_builder.append("--!");
SWITCH_TO(CommentEndDash);
}
ON('>')
@@ -1447,7 +1452,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- m_current_token.m_comment_or_character.data.append("--!");
+ m_current_builder.append("--!");
RECONSUME_IN(Comment);
}
}
@@ -1457,7 +1462,7 @@ _StartOfFunction:
{
ON('-')
{
- SWITCH_TO(CommentEnd);
+ SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentEnd);
}
ON_EOF
{
@@ -1467,7 +1472,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- m_current_token.m_comment_or_character.data.append('-');
+ m_current_builder.append('-');
RECONSUME_IN(Comment);
}
}
@@ -1477,12 +1482,12 @@ _StartOfFunction:
{
ON('!')
{
- m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value());
- SWITCH_TO(CommentLessThanSignBang);
+ m_current_builder.append_code_point(current_input_character.value());
+ SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentLessThanSignBang);
}
ON('<')
{
- m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value());
+ m_current_builder.append_code_point(current_input_character.value());
continue;
}
ANYTHING_ELSE
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
index 0ede6cc4ec..33f72ccbee 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
@@ -7,6 +7,7 @@
#pragma once
#include <AK/Queue.h>
+#include <AK/StringBuilder.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <AK/Utf8View.h>