summaryrefslogtreecommitdiff
path: root/AK
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2022-04-10 00:48:15 +0200
committerAndreas Kling <kling@serenityos.org>2022-04-10 01:37:45 +0200
commit79c77debb029af258353f5b188fd4f07d2c6b73a (patch)
treeed1546e55940fb5a4126245c6ed1cbf17f9e9526 /AK
parent3724ce765ea0fa24b11f6e57d9e0625b7fb94c62 (diff)
downloadserenity-79c77debb029af258353f5b188fd4f07d2c6b73a.zip
AK: Don't destructively re-encode query strings in the URL parser
We were decoding and then re-encoding the query string in URLs. This round-trip caused us to lose information about plus ('+') ASCII characters encoded as "%2B".
Diffstat (limited to 'AK')
-rw-r--r--AK/URL.cpp2
-rw-r--r--AK/URL.h2
-rw-r--r--AK/URLParser.cpp39
3 files changed, 40 insertions, 3 deletions
diff --git a/AK/URL.cpp b/AK/URL.cpp
index 7d4e1ee19e..e83eac4c7a 100644
--- a/AK/URL.cpp
+++ b/AK/URL.cpp
@@ -367,7 +367,7 @@ void URL::append_percent_encoded(StringBuilder& builder, u32 code_point)
}
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
-constexpr bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
+bool URL::code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
{
switch (set) {
case URL::PercentEncodeSet::C0Control:
diff --git a/AK/URL.h b/AK/URL.h
index 880dadae58..f60c8e1c93 100644
--- a/AK/URL.h
+++ b/AK/URL.h
@@ -113,6 +113,8 @@ public:
bool operator==(URL const& other) const { return equals(other, ExcludeFragment::No); }
+ static bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet);
+
private:
URL(String&& data_mime_type, String&& data_payload, bool payload_is_base64)
: m_valid(true)
diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp
index d508aa96dc..cd50ffbd7d 100644
--- a/AK/URLParser.cpp
+++ b/AK/URLParser.cpp
@@ -115,6 +115,41 @@ constexpr bool is_double_dot_path_segment(StringView input)
return input == ".."sv || input.equals_ignoring_case(".%2e"sv) || input.equals_ignoring_case("%2e."sv) || input.equals_ignoring_case("%2e%2e"sv);
}
+// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
+static String percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus = false)
+{
+ // NOTE: This is written somewhat ad-hoc since we don't yet implement the Encoding spec.
+
+ StringBuilder output;
+
+ // 3. For each byte of encodeOutput converted to a byte sequence:
+ for (auto byte : input) {
+ // 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
+ if (space_as_plus && byte == ' ') {
+ output.append('+');
+ continue;
+ }
+
+ // 2. Let isomorph be a code point whose value is byteโ€™s value.
+ u32 isomorph = byte;
+
+ // 3. Assert: percentEncodeSet includes all non-ASCII code points.
+
+ // 4. If isomorphic is not in percentEncodeSet, then append isomorph to output.
+ if (!URL::code_point_is_in_percent_encode_set(isomorph, percent_encode_set)) {
+ output.append_code_point(isomorph);
+ }
+
+ // 5. Otherwise, percent-encode byte and append the result to output.
+ else {
+ output.appendff("%{:02X}", byte);
+ }
+ }
+
+ // 6. Return output.
+ return output.to_string();
+}
+
// https://fetch.spec.whatwg.org/#data-urls
// FIXME: This only loosely follows the spec, as we use the same class for "regular" and data URLs, unlike the spec.
Optional<URL> URLParser::parse_data_url(StringView raw_input)
@@ -635,11 +670,11 @@ URL URLParser::parse(StringView raw_input, URL const* base_url, Optional<URL> ur
}
break;
case State::Query:
+ // https://url.spec.whatwg.org/#query-state
if (code_point == end_of_file || code_point == '#') {
VERIFY(url->m_query == "");
auto query_percent_encode_set = url->is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query;
- // NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
- url->m_query = URL::percent_decode(URL::percent_encode(buffer.string_view(), query_percent_encode_set));
+ url->m_query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set);
buffer.clear();
if (code_point == '#') {
url->m_fragment = "";