summaryrefslogtreecommitdiff
path: root/AK
diff options
context:
space:
mode:
authorMax Wipfli <mail@maxwipfli.ch>2021-05-27 21:05:07 +0200
committerAndreas Kling <kling@serenityos.org>2021-06-01 09:28:05 +0200
commit81f03e7a5d2b78f750d525c27b964ffe9bad6179 (patch)
treeace91d7885f4d4fab311b217291286b055e30411 /AK
parent1697f3c35b2fdf3312c79452029e03949ae5cbef (diff)
downloadserenity-81f03e7a5d2b78f750d525c27b964ffe9bad6179.zip
AK: Replace old URL parser with new URLParser::parse()
This replaces the old URL::parse() and URL::complete_url() parsing mechanisms with the new spec-compliant URLParser::parse().
Diffstat (limited to 'AK')
-rw-r--r--AK/URL.cpp286
-rw-r--r--AK/URL.h1
2 files changed, 11 insertions, 276 deletions
diff --git a/AK/URL.cpp b/AK/URL.cpp
index 22d4f189fe..d68b372b66 100644
--- a/AK/URL.cpp
+++ b/AK/URL.cpp
@@ -5,9 +5,11 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
+#include <AK/Debug.h>
#include <AK/LexicalPath.h>
#include <AK/StringBuilder.h>
#include <AK/URL.h>
+#include <AK/URLParser.h>
#include <AK/Utf8View.h>
namespace AK {
@@ -32,231 +34,16 @@ constexpr bool is_ascii_hex_digit(u32 code_point)
return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F');
}
-static inline bool is_valid_scheme_character(char ch)
-{
- return ch >= 'a' && ch <= 'z';
-}
-
-static inline bool is_valid_hostname_character(char ch)
-{
- return ch && ch != '/' && ch != ':';
-}
-
-static inline bool is_digit(char ch)
-{
- return ch >= '0' && ch <= '9';
-}
-
-bool URL::parse(const StringView& string)
-{
- if (string.is_null())
- return false;
-
- enum class State {
- InScheme,
- InHostname,
- InPort,
- InPath,
- InQuery,
- InFragment,
- InDataMimeType,
- InDataPayload,
- };
-
- Vector<char, 256> buffer;
- State state { State::InScheme };
-
- size_t index = 0;
-
- auto peek = [&] {
- if (index >= string.length())
- return '\0';
- return string[index];
- };
-
- auto consume = [&] {
- if (index >= string.length())
- return '\0';
- return string[index++];
- };
-
- while (index < string.length()) {
- switch (state) {
- case State::InScheme: {
- if (is_valid_scheme_character(peek())) {
- buffer.append(consume());
- continue;
- }
- if (consume() != ':')
- return false;
-
- m_scheme = String::copy(buffer);
-
- if (m_scheme == "data") {
- buffer.clear();
- m_host = "";
- state = State::InDataMimeType;
- continue;
- }
-
- if (m_scheme == "about") {
- buffer.clear();
- m_host = "";
- state = State::InPath;
- continue;
- }
-
- if (consume() != '/')
- return false;
- if (consume() != '/')
- return false;
- if (buffer.is_empty())
- return false;
- state = State::InHostname;
- buffer.clear();
- continue;
- }
- case State::InHostname:
- if (is_valid_hostname_character(peek())) {
- buffer.append(consume());
- continue;
- }
- if (buffer.is_empty()) {
- if (m_scheme == "file") {
- m_host = "";
- state = State::InPath;
- continue;
- }
- return false;
- }
- m_host = String::copy(buffer);
- buffer.clear();
- if (peek() == ':') {
- consume();
- state = State::InPort;
- continue;
- }
- if (peek() == '/') {
- state = State::InPath;
- continue;
- }
- return false;
- case State::InPort:
- if (is_digit(peek())) {
- buffer.append(consume());
- continue;
- }
- if (buffer.is_empty())
- return false;
- {
- auto port_opt = String::copy(buffer).to_uint();
- buffer.clear();
- if (!port_opt.has_value())
- return false;
- m_port = port_opt.value();
- }
- if (peek() == '/') {
- state = State::InPath;
- continue;
- }
- return false;
- case State::InPath:
- if (peek() == '?' || peek() == '#') {
- m_path = String::copy(buffer);
- buffer.clear();
- state = peek() == '?' ? State::InQuery : State::InFragment;
- consume();
- continue;
- }
- buffer.append(consume());
- continue;
- case State::InQuery:
- if (peek() == '#') {
- m_query = String::copy(buffer);
- buffer.clear();
- consume();
- state = State::InFragment;
- continue;
- }
- buffer.append(consume());
- continue;
- case State::InFragment:
- buffer.append(consume());
- continue;
- case State::InDataMimeType: {
- if (peek() != ';' && peek() != ',') {
- buffer.append(consume());
- continue;
- }
-
- m_data_mime_type = String::copy(buffer);
- buffer.clear();
-
- if (peek() == ';') {
- consume();
- if (consume() != 'b')
- return false;
- if (consume() != 'a')
- return false;
- if (consume() != 's')
- return false;
- if (consume() != 'e')
- return false;
- if (consume() != '6')
- return false;
- if (consume() != '4')
- return false;
- m_data_payload_is_base64 = true;
- }
-
- if (consume() != ',')
- return false;
-
- state = State::InDataPayload;
- break;
- }
- case State::InDataPayload:
- buffer.append(consume());
- break;
- }
- }
- if (state == State::InHostname) {
- // We're still in the hostname, so e.g "http://serenityos.org"
- if (buffer.is_empty())
- return false;
- m_host = String::copy(buffer);
- m_path = "/";
- }
- if (state == State::InScheme)
- return false;
- if (state == State::InPath)
- m_path = String::copy(buffer);
- if (state == State::InQuery)
- m_query = String::copy(buffer);
- if (state == State::InFragment)
- m_fragment = String::copy(buffer);
- if (state == State::InDataPayload)
- m_data_payload = URL::percent_decode(String::copy(buffer));
- if (state == State::InPort) {
- auto port_opt = String::copy(buffer).to_uint();
- if (port_opt.has_value())
- m_port = port_opt.value();
- }
-
- if (m_query.is_null())
- m_query = "";
- if (m_fragment.is_null())
- m_fragment = "";
-
- if (!m_port && scheme_requires_port(m_scheme))
- set_port(default_port_for_scheme(m_scheme));
-
- return compute_validity();
-}
-
+// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
URL::URL(const StringView& string)
+ : URL(URLParser::parse({}, string))
{
- m_valid = parse(string);
+ if constexpr (URL_PARSER_DEBUG) {
+ if (m_valid)
+ dbgln("URL constructor: Parsed URL to be '{}'.", serialize());
+ else
+ dbgln("URL constructor: Parsed URL to be invalid.");
+ }
}
String URL::path() const
@@ -318,58 +105,7 @@ URL URL::complete_url(const String& string) const
if (!is_valid())
return {};
- URL url(string);
- if (url.is_valid())
- return url;
-
- if (scheme() == "data")
- return {};
-
- if (string.starts_with("//")) {
- URL url(String::formatted("{}:{}", m_scheme, string));
- if (url.is_valid())
- return url;
- }
-
- if (string.starts_with("/")) {
- url = *this;
- url.set_path(string);
- return url;
- }
-
- if (string.starts_with("#")) {
- url = *this;
- url.set_fragment(string.substring(1, string.length() - 1));
- return url;
- }
-
- StringBuilder builder;
- LexicalPath lexical_path(path());
- builder.append('/');
-
- bool document_url_ends_in_slash = path()[path().length() - 1] == '/';
-
- for (size_t i = 0; i < lexical_path.parts().size(); ++i) {
- if (i == lexical_path.parts().size() - 1 && !document_url_ends_in_slash)
- break;
- builder.append(lexical_path.parts()[i]);
- builder.append('/');
- }
- builder.append(string);
- auto built = builder.to_string();
- lexical_path = LexicalPath(built);
-
- built = lexical_path.string();
- if (string.ends_with('/') && !built.ends_with('/')) {
- builder.clear();
- builder.append(built);
- builder.append('/');
- built = builder.to_string();
- }
-
- url = *this;
- url.set_path(built);
- return url;
+ return URLParser::parse({}, string, this);
}
void URL::set_scheme(const String& scheme)
diff --git a/AK/URL.h b/AK/URL.h
index c83b87f1ba..eedf0ce4e2 100644
--- a/AK/URL.h
+++ b/AK/URL.h
@@ -121,7 +121,6 @@ private:
{
}
- bool parse(const StringView&);
bool compute_validity() const;
String serialize_data_url() const;