summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2020-06-23 16:19:07 +0200
committerAndreas Kling <kling@serenityos.org>2020-06-23 16:45:01 +0200
commitc33d17d363cc88bc6193f88c1b94eadc043fcaac (patch)
treeb312b12400ed6682338738c7d63d4a631e34d330
parent64001227607e0ea602b228f6fadd8efc90046450 (diff)
downloadserenity-c33d17d363cc88bc6193f88c1b94eadc043fcaac.zip
LibWeb: Fix tokenization of attributes with URL query strings in them
<a href="/foo&amp=bar"> was being tokenized into <a href="/foo&=bar">. The spec mentions this but I had overlooked it. The bug happens because we interpreted the "&amp" as a named character reference.
-rw-r--r--Libraries/LibWeb/Parser/HTMLTokenizer.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
index 1a3bdc5ac3..eebf3e2601 100644
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -1458,6 +1458,14 @@ _StartOfFunction:
for (auto ch : match.value().entity)
m_temporary_buffer.append(ch);
+ if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') {
+ auto next = peek_codepoint(0);
+ if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) {
+ FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
+ SWITCH_TO_RETURN_STATE;
+ }
+ }
+
if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
auto next_codepoint = peek_codepoint(0);
if (next_codepoint.has_value() && next_codepoint.value() == '=') {