diff options
author | MacDue <macdue@dueutil.tech> | 2023-04-09 14:21:00 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2023-04-12 07:40:22 +0200 |
commit | 8283e8b88c5ae78d72894cafa765dc5e0289161d (patch) | |
tree | 7d2acc7ac9a67e57aee292f1fb1d4ca5e99ec64b /AK/URLParser.cpp | |
parent | d2fc8efd9ed0b11fb9399be72113622bf3231426 (diff) | |
download | serenity-8283e8b88c5ae78d72894cafa765dc5e0289161d.zip |
AK: Don't store parts of URLs percent decoded
As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.
For example, previously if you had a URL like:
https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)
Creating URL("https:://foo.com/what%2F%2F").serialize() would return:
https://foo.com/what//
Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.
Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).
Seems to fix #13477 too
Diffstat (limited to 'AK/URLParser.cpp')
-rw-r--r-- | AK/URLParser.cpp | 31 |
1 files changed, 11 insertions, 20 deletions
diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 7b570f6257..842f7f4a0a 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -194,9 +194,6 @@ Optional<URL> URLParser::parse_data_url(StringView raw_input) // future for validation of URLs, which would then lead to infinite recursion. // The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member // variables' values here, not what the URL class presents to its users. -// NOTE: Since the URL class's member variables contain percent decoded data, we have to deviate from the URL parser specification when setting -// some of those values. Because the specification leaves all values percent encoded in their URL data structure, we have to percent decode -// everything before setting the member variables. URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override) { dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input); @@ -310,7 +307,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option ++iterator; } else { url->m_cannot_be_a_base_url = true; - url->append_path(""); + url->append_slash(); state = State::CannotBeABaseUrlPath; } } else { @@ -441,13 +438,11 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option if (password_token_seen) { builder.append(url->password()); URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo); - // NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences. - url->m_password = URL::percent_decode(builder.string_view()); + url->m_password = builder.string_view(); } else { builder.append(url->username()); URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo); - // NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences. - url->m_username = URL::percent_decode(builder.string_view()); + url->m_username = builder.string_view(); } } buffer.clear(); @@ -561,7 +556,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option url->m_host = base_url->m_host; auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string(); if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0])) - url->append_path(base_url->m_paths[0]); + url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No); state = State::Path; continue; } @@ -616,9 +611,9 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option if (!url->m_paths.is_empty() && !(url->m_scheme == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0]))) url->m_paths.remove(url->m_paths.size() - 1); if (code_point != '/' && !(url->is_special() && code_point == '\\')) - url->append_path(""); + url->append_slash(); } else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) { - url->append_path(""); + url->append_slash(); } else if (!is_single_dot_path_segment(buffer.string_view())) { if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) { auto drive_letter = buffer.string_view()[0]; @@ -626,8 +621,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option buffer.append(drive_letter); buffer.append(':'); } - // NOTE: This needs to be percent decoded since the member variables contain decoded data. - url->append_path(URL::percent_decode(buffer.string_view())); + url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No); } buffer.clear(); if (code_point == '?') { @@ -649,13 +643,12 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option // NOTE: Verify that the assumptions required for this simplification are correct. VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty()); if (code_point == '?') { - // NOTE: This needs to be percent decoded since the member variables contain decoded data. - url->m_paths[0] = URL::percent_decode(buffer.string_view()); + url->m_paths[0] = buffer.string_view(); url->m_query = ""; state = State::Query; } else if (code_point == '#') { // NOTE: This needs to be percent decoded since the member variables contain decoded data. - url->m_paths[0] = URL::percent_decode(buffer.string_view()); + url->m_paths[0] = buffer.string_view(); url->m_fragment = ""; state = State::Fragment; } else { @@ -665,8 +658,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option if (code_point != end_of_file) { URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control); } else { - // NOTE: This needs to be percent decoded since the member variables contain decoded data. - url->m_paths[0] = URL::percent_decode(buffer.string_view()); + url->m_paths[0] = buffer.string_view(); } } break; @@ -696,8 +688,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option // FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. buffer.append_code_point(code_point); } else { - // NOTE: This needs to be percent decoded since the member variables contain decoded data. - url->m_fragment = URL::percent_decode(buffer.string_view()); + url->m_fragment = buffer.string_view(); buffer.clear(); } break; |