diff options
author | Max Wipfli <mail@maxwipfli.ch> | 2021-06-03 12:43:08 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-06-05 10:53:31 +0200 |
commit | 2164d8aae8ad5b7339cc88f288993fef06ad89f6 (patch) | |
tree | 3d6b1ed487fc238de627c49861f93ea1f5a8ab7e /AK | |
parent | 97425c7dfb8fd7728617601d1bff3203f409a7e8 (diff) | |
download | serenity-2164d8aae8ad5b7339cc88f288993fef06ad89f6.zip |
AK: Stop using U+0000 as end of file code point in URL parser
This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to
indicate end of file. This fixes a bug where inputs containing null
bytes would terminate the parser early, because they were interpreted
as end of file.
Diffstat (limited to 'AK')
-rw-r--r-- | AK/URLParser.cpp | 34 |
1 files changed, 18 insertions, 16 deletions
diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 880ef04a50..847a8bf002 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -16,6 +16,9 @@ namespace AK { +// NOTE: This is similar to the LibC macro EOF = -1. +constexpr u32 end_of_file = 0xFFFFFFFF; + constexpr bool is_url_code_point(u32 code_point) { // FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters. @@ -221,14 +224,13 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur // NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop. // ++iterator : "increase pointer by 1" // continue : "decrease pointer by 1" - // NOTE: The NULL code point is used as the "EOF code point". for (;;) { - u32 code_point = 0; + u32 code_point = end_of_file; if (!iterator.done()) code_point = *iterator; if constexpr (URL_PARSER_DEBUG) { - if (!code_point) + if (code_point == end_of_file) dbgln("URLParser::parse: {} state with EOF.", state_name(state)); else if (is_ascii_printable(code_point)) dbgln("URLParser::parse: {} state with code point U+{:04X} ({:c}).", state_name(state), code_point, code_point); @@ -335,7 +337,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur } else if (code_point == '#') { url.m_fragment = ""; state = State::Fragment; - } else if (code_point != 0) { + } else if (code_point != end_of_file) { url.m_query = {}; if (url.m_paths.size()) url.m_paths.remove(url.m_paths.size() - 1); @@ -408,7 +410,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur } } buffer.clear(); - } else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) { + } else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) { if (at_sign_seen && buffer.is_empty()) { report_validation_error(); return {}; @@ -434,7 +436,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur url.m_host = host.release_value(); buffer.clear(); state = State::Port; - } else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) { + } else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) { if (url.is_special() && buffer.is_empty()) { report_validation_error(); return {}; @@ -457,7 +459,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur case State::Port: if (is_ascii_digit(code_point)) { buffer.append_code_point(code_point); - } else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) { + } else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) { if (!buffer.is_empty()) { auto port = buffer.to_string().to_uint(); if (!port.has_value() || port.value() > 65535) { @@ -494,7 +496,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur } else if (code_point == '#') { url.m_fragment = ""; state = State::Fragment; - } else if (code_point != 0) { + } else if (code_point != end_of_file) { url.m_query = {}; auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string(); if (!starts_with_windows_drive_letter(substring_from_pointer)) { @@ -524,7 +526,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur } break; case State::FileHost: - if (code_point == 0 || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') { + if (code_point == end_of_file || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') { if (is_windows_drive_letter(buffer.to_string())) { report_validation_error(); state = State::Path; @@ -559,14 +561,14 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur } else if (code_point == '#') { url.m_fragment = ""; state = State::Fragment; - } else if (code_point != 0) { + } else if (code_point != end_of_file) { state = State::Path; if (code_point != '/') continue; } break; case State::Path: - if (code_point == 0 || code_point == '/' || (url.is_special() && code_point == '\\') || code_point == '?' || code_point == '#') { + if (code_point == end_of_file || code_point == '/' || (url.is_special() && code_point == '\\') || code_point == '?' || code_point == '#') { if (url.is_special() && code_point == '\\') report_validation_error(); if (is_double_dot_path_segment(buffer.to_string())) { @@ -616,10 +618,10 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur url.m_fragment = ""; state = State::Fragment; } else { - if (code_point != 0 && !is_url_code_point(code_point) && code_point != '%') + if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%') report_validation_error(); // FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. - if (code_point != 0) { + if (code_point != end_of_file) { URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control); } else { // NOTE: This needs to be percent decoded since the member variables contain decoded data. @@ -628,7 +630,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur } break; case State::Query: - if (code_point == '#' || code_point == 0) { + if (code_point == end_of_file || code_point == '#') { VERIFY(url.m_query == ""); auto query_percent_encode_set = url.is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query; // NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences. @@ -638,7 +640,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur url.m_fragment = ""; state = State::Fragment; } - } else if (code_point != 0) { + } else if (code_point != end_of_file) { if (!is_url_code_point(code_point) && code_point != '%') report_validation_error(); // FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. @@ -647,7 +649,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur break; case State::Fragment: // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the fragment on EOF. - if (code_point) { + if (code_point != end_of_file) { if (!is_url_code_point(code_point) && code_point != '%') report_validation_error(); // FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. |