summaryrefslogtreecommitdiff
path: root/AK
diff options
context:
space:
mode:
authorMax Wipfli <mail@maxwipfli.ch>2021-06-03 12:43:08 +0200
committerAndreas Kling <kling@serenityos.org>2021-06-05 10:53:31 +0200
commit2164d8aae8ad5b7339cc88f288993fef06ad89f6 (patch)
tree3d6b1ed487fc238de627c49861f93ea1f5a8ab7e /AK
parent97425c7dfb8fd7728617601d1bff3203f409a7e8 (diff)
downloadserenity-2164d8aae8ad5b7339cc88f288993fef06ad89f6.zip
AK: Stop using U+0000 as end of file code point in URL parser
This changes URL parser to use the 0xFFFFFFFF constant instead of 0 to indicate end of file. This fixes a bug where inputs containing null bytes would terminate the parser early, because they were interpreted as end of file.
Diffstat (limited to 'AK')
-rw-r--r--AK/URLParser.cpp34
1 files changed, 18 insertions, 16 deletions
diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp
index 880ef04a50..847a8bf002 100644
--- a/AK/URLParser.cpp
+++ b/AK/URLParser.cpp
@@ -16,6 +16,9 @@
namespace AK {
+// NOTE: This is similar to the LibC macro EOF = -1.
+constexpr u32 end_of_file = 0xFFFFFFFF;
+
constexpr bool is_url_code_point(u32 code_point)
{
// FIXME: [...] and code points in the range U+00A0 to U+10FFFD, inclusive, excluding surrogates and noncharacters.
@@ -221,14 +224,13 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
// NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop.
// ++iterator : "increase pointer by 1"
// continue : "decrease pointer by 1"
- // NOTE: The NULL code point is used as the "EOF code point".
for (;;) {
- u32 code_point = 0;
+ u32 code_point = end_of_file;
if (!iterator.done())
code_point = *iterator;
if constexpr (URL_PARSER_DEBUG) {
- if (!code_point)
+ if (code_point == end_of_file)
dbgln("URLParser::parse: {} state with EOF.", state_name(state));
else if (is_ascii_printable(code_point))
dbgln("URLParser::parse: {} state with code point U+{:04X} ({:c}).", state_name(state), code_point, code_point);
@@ -335,7 +337,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
} else if (code_point == '#') {
url.m_fragment = "";
state = State::Fragment;
- } else if (code_point != 0) {
+ } else if (code_point != end_of_file) {
url.m_query = {};
if (url.m_paths.size())
url.m_paths.remove(url.m_paths.size() - 1);
@@ -408,7 +410,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
}
}
buffer.clear();
- } else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
+ } else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
if (at_sign_seen && buffer.is_empty()) {
report_validation_error();
return {};
@@ -434,7 +436,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
url.m_host = host.release_value();
buffer.clear();
state = State::Port;
- } else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
+ } else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
if (url.is_special() && buffer.is_empty()) {
report_validation_error();
return {};
@@ -457,7 +459,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
case State::Port:
if (is_ascii_digit(code_point)) {
buffer.append_code_point(code_point);
- } else if (code_point == 0 || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
+ } else if (code_point == end_of_file || code_point == '/' || code_point == '?' || code_point == '#' || (url.is_special() && code_point == '\\')) {
if (!buffer.is_empty()) {
auto port = buffer.to_string().to_uint();
if (!port.has_value() || port.value() > 65535) {
@@ -494,7 +496,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
} else if (code_point == '#') {
url.m_fragment = "";
state = State::Fragment;
- } else if (code_point != 0) {
+ } else if (code_point != end_of_file) {
url.m_query = {};
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
if (!starts_with_windows_drive_letter(substring_from_pointer)) {
@@ -524,7 +526,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
}
break;
case State::FileHost:
- if (code_point == 0 || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') {
+ if (code_point == end_of_file || code_point == '/' || code_point == '\\' || code_point == '?' || code_point == '#') {
if (is_windows_drive_letter(buffer.to_string())) {
report_validation_error();
state = State::Path;
@@ -559,14 +561,14 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
} else if (code_point == '#') {
url.m_fragment = "";
state = State::Fragment;
- } else if (code_point != 0) {
+ } else if (code_point != end_of_file) {
state = State::Path;
if (code_point != '/')
continue;
}
break;
case State::Path:
- if (code_point == 0 || code_point == '/' || (url.is_special() && code_point == '\\') || code_point == '?' || code_point == '#') {
+ if (code_point == end_of_file || code_point == '/' || (url.is_special() && code_point == '\\') || code_point == '?' || code_point == '#') {
if (url.is_special() && code_point == '\\')
report_validation_error();
if (is_double_dot_path_segment(buffer.to_string())) {
@@ -616,10 +618,10 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
url.m_fragment = "";
state = State::Fragment;
} else {
- if (code_point != 0 && !is_url_code_point(code_point) && code_point != '%')
+ if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
- if (code_point != 0) {
+ if (code_point != end_of_file) {
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
} else {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
@@ -628,7 +630,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
}
break;
case State::Query:
- if (code_point == '#' || code_point == 0) {
+ if (code_point == end_of_file || code_point == '#') {
VERIFY(url.m_query == "");
auto query_percent_encode_set = url.is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query;
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
@@ -638,7 +640,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
url.m_fragment = "";
state = State::Fragment;
}
- } else if (code_point != 0) {
+ } else if (code_point != end_of_file) {
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
@@ -647,7 +649,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
break;
case State::Fragment:
// NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the fragment on EOF.
- if (code_point) {
+ if (code_point != end_of_file) {
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.