diff options
author | Andreas Kling <kling@serenityos.org> | 2020-06-23 16:19:07 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-06-23 16:45:01 +0200 |
commit | c33d17d363cc88bc6193f88c1b94eadc043fcaac (patch) | |
tree | b312b12400ed6682338738c7d63d4a631e34d330 | |
parent | 64001227607e0ea602b228f6fadd8efc90046450 (diff) | |
download | serenity-c33d17d363cc88bc6193f88c1b94eadc043fcaac.zip |
LibWeb: Fix tokenization of attributes with URL query strings in them
<a href="/foo&=bar"> was being tokenized into <a href="/foo&=bar">.
The spec mentions this but I had overlooked it. The bug happens because
we interpreted the "&" as a named character reference.
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 1a3bdc5ac3..eebf3e2601 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -1458,6 +1458,14 @@ _StartOfFunction: for (auto ch : match.value().entity) m_temporary_buffer.append(ch); + if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') { + auto next = peek_codepoint(0); + if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) { + FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + SWITCH_TO_RETURN_STATE; + } + } + if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) { auto next_codepoint = peek_codepoint(0); if (next_codepoint.has_value() && next_codepoint.value() == '=') { |