diff options
5 files changed, 54 insertions, 46 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index fb3f82f5dd..22e27a3f43 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -317,7 +317,7 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token) } if (token.is_comment()) { - auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string())); + auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data)); document().append_child(move(comment)); return; } @@ -347,7 +347,7 @@ void HTMLDocumentParser::handle_before_html(HTMLToken& token) } if (token.is_comment()) { - auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string())); + auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data)); document().append_child(move(comment)); return; } @@ -520,7 +520,7 @@ AnythingElse: void HTMLDocumentParser::insert_comment(HTMLToken& token) { - auto data = token.m_comment_or_character.data.to_string(); + auto data = token.m_comment_or_character.data; auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); adjusted_insertion_location.parent->insert_before(adopt_ref(*new DOM::Comment(document(), data)), adjusted_insertion_location.insert_before_sibling); } @@ -832,7 +832,7 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token) } if (token.is_comment()) { - auto data = token.m_comment_or_character.data.to_string(); + auto data = token.m_comment_or_character.data; auto& insertion_location = m_stack_of_open_elements.first(); insertion_location.append_child(adopt_ref(*new DOM::Comment(document(), data))); return; @@ -870,7 +870,7 @@ void HTMLDocumentParser::handle_after_body(HTMLToken& token) void HTMLDocumentParser::handle_after_after_body(HTMLToken& token) { if (token.is_comment()) { - auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string())); + auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data)); document().append_child(move(comment)); return; } @@ -2751,7 +2751,7 @@ void HTMLDocumentParser::handle_after_frameset(HTMLToken& token) void HTMLDocumentParser::handle_after_after_frameset(HTMLToken& token) { if (token.is_comment()) { - auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data.to_string())); + auto comment = adopt_ref(*new DOM::Comment(document(), token.m_comment_or_character.data)); document().append_child(move(comment)); return; } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp index eccf3c1723..432df887b9 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp @@ -53,7 +53,7 @@ String HTMLToken::to_string() const if (type() == HTMLToken::Type::Comment || type() == HTMLToken::Type::Character) { builder.append(" { data: '"); - builder.append(m_comment_or_character.data.to_string()); + builder.append(m_comment_or_character.data); builder.append("' }"); } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h index 95475d8c1f..0b0366ef94 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h @@ -8,7 +8,6 @@ #include <AK/FlyString.h> #include <AK/String.h> -#include <AK/StringBuilder.h> #include <AK/Types.h> #include <AK/Utf8View.h> #include <AK/Vector.h> @@ -34,7 +33,10 @@ public: { HTMLToken token; token.m_type = Type::Character; - token.m_comment_or_character.data.append(code_point); + StringBuilder builder; + // FIXME: This narrows code_point to char, should this be append_code_point() instead? + builder.append(code_point); + token.m_comment_or_character.data = builder.to_string(); return token; } @@ -56,7 +58,7 @@ public: u32 code_point() const { VERIFY(is_character()); - Utf8View view(m_comment_or_character.data.string_view()); + Utf8View view(m_comment_or_character.data); VERIFY(view.length() == 1); return *view.begin(); } @@ -209,7 +211,7 @@ private: // Type::Comment // Type::Character struct { - StringBuilder data; + String data; } m_comment_or_character; Position m_start_position; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 312875ece5..90d13540f4 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -74,17 +74,18 @@ namespace Web::HTML { goto new_state; \ } while (0) -#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \ - do { \ - for (auto code_point : m_temporary_buffer) { \ - if (consumed_as_part_of_an_attribute()) { \ - m_current_builder.append_code_point(code_point); \ - } else { \ - create_new_token(HTMLToken::Type::Character); \ - m_current_token.m_comment_or_character.data.append_code_point(code_point); \ - m_queued_tokens.enqueue(m_current_token); \ - } \ - } \ +#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \ + do { \ + for (auto code_point : m_temporary_buffer) { \ + if (consumed_as_part_of_an_attribute()) { \ + m_current_builder.append_code_point(code_point); \ + } else { \ + create_new_token(HTMLToken::Type::Character); \ + m_current_builder.append_code_point(code_point); \ + m_current_token.m_comment_or_character.data = consume_current_builder(); \ + m_queued_tokens.enqueue(m_current_token); \ + } \ + } \ } while (0) #define DONT_CONSUME_NEXT_INPUT_CHARACTER \ @@ -139,12 +140,13 @@ namespace Web::HTML { return m_queued_tokens.dequeue(); \ } while (0) -#define EMIT_CHARACTER(code_point) \ - do { \ - create_new_token(HTMLToken::Type::Character); \ - m_current_token.m_comment_or_character.data.append_code_point(code_point); \ - m_queued_tokens.enqueue(m_current_token); \ - return m_queued_tokens.dequeue(); \ +#define EMIT_CHARACTER(code_point) \ + do { \ + create_new_token(HTMLToken::Type::Character); \ + m_current_builder.append_code_point(code_point); \ + m_current_token.m_comment_or_character.data = consume_current_builder(); \ + m_queued_tokens.enqueue(m_current_token); \ + return m_queued_tokens.dequeue(); \ } while (0) #define EMIT_CURRENT_CHARACTER \ @@ -402,6 +404,7 @@ _StartOfFunction: { ON('>') { + m_current_token.m_comment_or_character.data = consume_current_builder(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF @@ -412,12 +415,12 @@ _StartOfFunction: ON(0) { log_parse_error(); - m_current_token.m_comment_or_character.data.append_code_point(0xFFFD); + m_current_builder.append_code_point(0xFFFD); continue; } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value()); + m_current_builder.append_code_point(current_input_character.value()); continue; } } @@ -1346,11 +1349,12 @@ _StartOfFunction: { ON('-') { - SWITCH_TO(CommentEnd); + SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentEnd); } ON('>') { log_parse_error(); + consume_current_builder(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF @@ -1361,7 +1365,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append('-'); + m_current_builder.append('-'); RECONSUME_IN(Comment); } } @@ -1371,17 +1375,17 @@ _StartOfFunction: { ON('<') { - m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value()); - SWITCH_TO(CommentLessThanSign); + m_current_builder.append_code_point(current_input_character.value()); + SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentLessThanSign); } ON('-') { - SWITCH_TO(CommentEndDash); + SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentEndDash); } ON(0) { log_parse_error(); - m_current_token.m_comment_or_character.data.append_code_point(0xFFFD); + m_current_builder.append_code_point(0xFFFD); continue; } ON_EOF @@ -1392,7 +1396,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value()); + m_current_builder.append_code_point(current_input_character.value()); continue; } } @@ -1402,6 +1406,7 @@ _StartOfFunction: { ON('>') { + m_current_token.m_comment_or_character.data = consume_current_builder(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON('!') @@ -1410,7 +1415,7 @@ _StartOfFunction: } ON('-') { - m_current_token.m_comment_or_character.data.append('-'); + m_current_builder.append('-'); continue; } ON_EOF @@ -1421,7 +1426,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append('-'); + m_current_builder.append('-'); RECONSUME_IN(Comment); } } @@ -1431,7 +1436,7 @@ _StartOfFunction: { ON('-') { - m_current_token.m_comment_or_character.data.append("--!"); + m_current_builder.append("--!"); SWITCH_TO(CommentEndDash); } ON('>') @@ -1447,7 +1452,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append("--!"); + m_current_builder.append("--!"); RECONSUME_IN(Comment); } } @@ -1457,7 +1462,7 @@ _StartOfFunction: { ON('-') { - SWITCH_TO(CommentEnd); + SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentEnd); } ON_EOF { @@ -1467,7 +1472,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append('-'); + m_current_builder.append('-'); RECONSUME_IN(Comment); } } @@ -1477,12 +1482,12 @@ _StartOfFunction: { ON('!') { - m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value()); - SWITCH_TO(CommentLessThanSignBang); + m_current_builder.append_code_point(current_input_character.value()); + SWITCH_TO_WITH_UNCLEAN_BUILDER(CommentLessThanSignBang); } ON('<') { - m_current_token.m_comment_or_character.data.append_code_point(current_input_character.value()); + m_current_builder.append_code_point(current_input_character.value()); continue; } ANYTHING_ELSE diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h index 0ede6cc4ec..33f72ccbee 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h @@ -7,6 +7,7 @@ #pragma once #include <AK/Queue.h> +#include <AK/StringBuilder.h> #include <AK/StringView.h> #include <AK/Types.h> #include <AK/Utf8View.h> |