summaryrefslogtreecommitdiff
path: root/AK/URLParser.cpp
diff options
context:
space:
mode:
authorMacDue <macdue@dueutil.tech>2023-04-09 14:21:00 +0100
committerAndreas Kling <kling@serenityos.org>2023-04-12 07:40:22 +0200
commit8283e8b88c5ae78d72894cafa765dc5e0289161d (patch)
tree7d2acc7ac9a67e57aee292f1fb1d4ca5e99ec64b /AK/URLParser.cpp
parentd2fc8efd9ed0b11fb9399be72113622bf3231426 (diff)
downloadserenity-8283e8b88c5ae78d72894cafa765dc5e0289161d.zip
AK: Don't store parts of URLs percent decoded
As noted in serval comments doing this goes against the WC3 spec, and breaks parsing then re-serializing URLs that contain percent encoded data, that was not encoded using the same character set as the serializer. For example, previously if you had a URL like: https:://foo.com/what%2F%2F (the path is what + '//' percent encoded) Creating URL("https:://foo.com/what%2F%2F").serialize() would return: https://foo.com/what// Which is incorrect and not the same as the URL we passed. This is because the re-serializing uses the PercentEncodeSet::Path which does not include '/'. Only doing the percent encoding in the setters fixes this, which is required to navigate to Google Street View (which includes a percent encoded URL in its URL). Seems to fix #13477 too
Diffstat (limited to 'AK/URLParser.cpp')
-rw-r--r--AK/URLParser.cpp31
1 files changed, 11 insertions, 20 deletions
diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp
index 7b570f6257..842f7f4a0a 100644
--- a/AK/URLParser.cpp
+++ b/AK/URLParser.cpp
@@ -194,9 +194,6 @@ Optional<URL> URLParser::parse_data_url(StringView raw_input)
// future for validation of URLs, which would then lead to infinite recursion.
// The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
// variables' values here, not what the URL class presents to its users.
-// NOTE: Since the URL class's member variables contain percent decoded data, we have to deviate from the URL parser specification when setting
-// some of those values. Because the specification leaves all values percent encoded in their URL data structure, we have to percent decode
-// everything before setting the member variables.
URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
{
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
@@ -310,7 +307,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
++iterator;
} else {
url->m_cannot_be_a_base_url = true;
- url->append_path("");
+ url->append_slash();
state = State::CannotBeABaseUrlPath;
}
} else {
@@ -441,13 +438,11 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (password_token_seen) {
builder.append(url->password());
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
- // NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
- url->m_password = URL::percent_decode(builder.string_view());
+ url->m_password = builder.string_view();
} else {
builder.append(url->username());
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
- // NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
- url->m_username = URL::percent_decode(builder.string_view());
+ url->m_username = builder.string_view();
}
}
buffer.clear();
@@ -561,7 +556,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
url->m_host = base_url->m_host;
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
- url->append_path(base_url->m_paths[0]);
+ url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
state = State::Path;
continue;
}
@@ -616,9 +611,9 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (!url->m_paths.is_empty() && !(url->m_scheme == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
url->m_paths.remove(url->m_paths.size() - 1);
if (code_point != '/' && !(url->is_special() && code_point == '\\'))
- url->append_path("");
+ url->append_slash();
} else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
- url->append_path("");
+ url->append_slash();
} else if (!is_single_dot_path_segment(buffer.string_view())) {
if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
auto drive_letter = buffer.string_view()[0];
@@ -626,8 +621,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
buffer.append(drive_letter);
buffer.append(':');
}
- // NOTE: This needs to be percent decoded since the member variables contain decoded data.
- url->append_path(URL::percent_decode(buffer.string_view()));
+ url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
}
buffer.clear();
if (code_point == '?') {
@@ -649,13 +643,12 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
// NOTE: Verify that the assumptions required for this simplification are correct.
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
if (code_point == '?') {
- // NOTE: This needs to be percent decoded since the member variables contain decoded data.
- url->m_paths[0] = URL::percent_decode(buffer.string_view());
+ url->m_paths[0] = buffer.string_view();
url->m_query = "";
state = State::Query;
} else if (code_point == '#') {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
- url->m_paths[0] = URL::percent_decode(buffer.string_view());
+ url->m_paths[0] = buffer.string_view();
url->m_fragment = "";
state = State::Fragment;
} else {
@@ -665,8 +658,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (code_point != end_of_file) {
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
} else {
- // NOTE: This needs to be percent decoded since the member variables contain decoded data.
- url->m_paths[0] = URL::percent_decode(buffer.string_view());
+ url->m_paths[0] = buffer.string_view();
}
}
break;
@@ -696,8 +688,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
buffer.append_code_point(code_point);
} else {
- // NOTE: This needs to be percent decoded since the member variables contain decoded data.
- url->m_fragment = URL::percent_decode(buffer.string_view());
+ url->m_fragment = buffer.string_view();
buffer.clear();
}
break;