diff options
Diffstat (limited to 'AK')
-rw-r--r-- | AK/URL.cpp | 286 | ||||
-rw-r--r-- | AK/URL.h | 1 |
2 files changed, 11 insertions, 276 deletions
diff --git a/AK/URL.cpp b/AK/URL.cpp index 22d4f189fe..d68b372b66 100644 --- a/AK/URL.cpp +++ b/AK/URL.cpp @@ -5,9 +5,11 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include <AK/Debug.h> #include <AK/LexicalPath.h> #include <AK/StringBuilder.h> #include <AK/URL.h> +#include <AK/URLParser.h> #include <AK/Utf8View.h> namespace AK { @@ -32,231 +34,16 @@ constexpr bool is_ascii_hex_digit(u32 code_point) return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F'); } -static inline bool is_valid_scheme_character(char ch) -{ - return ch >= 'a' && ch <= 'z'; -} - -static inline bool is_valid_hostname_character(char ch) -{ - return ch && ch != '/' && ch != ':'; -} - -static inline bool is_digit(char ch) -{ - return ch >= '0' && ch <= '9'; -} - -bool URL::parse(const StringView& string) -{ - if (string.is_null()) - return false; - - enum class State { - InScheme, - InHostname, - InPort, - InPath, - InQuery, - InFragment, - InDataMimeType, - InDataPayload, - }; - - Vector<char, 256> buffer; - State state { State::InScheme }; - - size_t index = 0; - - auto peek = [&] { - if (index >= string.length()) - return '\0'; - return string[index]; - }; - - auto consume = [&] { - if (index >= string.length()) - return '\0'; - return string[index++]; - }; - - while (index < string.length()) { - switch (state) { - case State::InScheme: { - if (is_valid_scheme_character(peek())) { - buffer.append(consume()); - continue; - } - if (consume() != ':') - return false; - - m_scheme = String::copy(buffer); - - if (m_scheme == "data") { - buffer.clear(); - m_host = ""; - state = State::InDataMimeType; - continue; - } - - if (m_scheme == "about") { - buffer.clear(); - m_host = ""; - state = State::InPath; - continue; - } - - if (consume() != '/') - return false; - if (consume() != '/') - return false; - if (buffer.is_empty()) - return false; - state = State::InHostname; - buffer.clear(); - continue; - } - case State::InHostname: - if (is_valid_hostname_character(peek())) { - buffer.append(consume()); - continue; - } - if (buffer.is_empty()) { - if (m_scheme == "file") { - m_host = ""; - state = State::InPath; - continue; - } - return false; - } - m_host = String::copy(buffer); - buffer.clear(); - if (peek() == ':') { - consume(); - state = State::InPort; - continue; - } - if (peek() == '/') { - state = State::InPath; - continue; - } - return false; - case State::InPort: - if (is_digit(peek())) { - buffer.append(consume()); - continue; - } - if (buffer.is_empty()) - return false; - { - auto port_opt = String::copy(buffer).to_uint(); - buffer.clear(); - if (!port_opt.has_value()) - return false; - m_port = port_opt.value(); - } - if (peek() == '/') { - state = State::InPath; - continue; - } - return false; - case State::InPath: - if (peek() == '?' || peek() == '#') { - m_path = String::copy(buffer); - buffer.clear(); - state = peek() == '?' ? State::InQuery : State::InFragment; - consume(); - continue; - } - buffer.append(consume()); - continue; - case State::InQuery: - if (peek() == '#') { - m_query = String::copy(buffer); - buffer.clear(); - consume(); - state = State::InFragment; - continue; - } - buffer.append(consume()); - continue; - case State::InFragment: - buffer.append(consume()); - continue; - case State::InDataMimeType: { - if (peek() != ';' && peek() != ',') { - buffer.append(consume()); - continue; - } - - m_data_mime_type = String::copy(buffer); - buffer.clear(); - - if (peek() == ';') { - consume(); - if (consume() != 'b') - return false; - if (consume() != 'a') - return false; - if (consume() != 's') - return false; - if (consume() != 'e') - return false; - if (consume() != '6') - return false; - if (consume() != '4') - return false; - m_data_payload_is_base64 = true; - } - - if (consume() != ',') - return false; - - state = State::InDataPayload; - break; - } - case State::InDataPayload: - buffer.append(consume()); - break; - } - } - if (state == State::InHostname) { - // We're still in the hostname, so e.g "http://serenityos.org" - if (buffer.is_empty()) - return false; - m_host = String::copy(buffer); - m_path = "/"; - } - if (state == State::InScheme) - return false; - if (state == State::InPath) - m_path = String::copy(buffer); - if (state == State::InQuery) - m_query = String::copy(buffer); - if (state == State::InFragment) - m_fragment = String::copy(buffer); - if (state == State::InDataPayload) - m_data_payload = URL::percent_decode(String::copy(buffer)); - if (state == State::InPort) { - auto port_opt = String::copy(buffer).to_uint(); - if (port_opt.has_value()) - m_port = port_opt.value(); - } - - if (m_query.is_null()) - m_query = ""; - if (m_fragment.is_null()) - m_fragment = ""; - - if (!m_port && scheme_requires_port(m_scheme)) - set_port(default_port_for_scheme(m_scheme)); - - return compute_validity(); -} - +// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor. URL::URL(const StringView& string) + : URL(URLParser::parse({}, string)) { - m_valid = parse(string); + if constexpr (URL_PARSER_DEBUG) { + if (m_valid) + dbgln("URL constructor: Parsed URL to be '{}'.", serialize()); + else + dbgln("URL constructor: Parsed URL to be invalid."); + } } String URL::path() const @@ -318,58 +105,7 @@ URL URL::complete_url(const String& string) const if (!is_valid()) return {}; - URL url(string); - if (url.is_valid()) - return url; - - if (scheme() == "data") - return {}; - - if (string.starts_with("//")) { - URL url(String::formatted("{}:{}", m_scheme, string)); - if (url.is_valid()) - return url; - } - - if (string.starts_with("/")) { - url = *this; - url.set_path(string); - return url; - } - - if (string.starts_with("#")) { - url = *this; - url.set_fragment(string.substring(1, string.length() - 1)); - return url; - } - - StringBuilder builder; - LexicalPath lexical_path(path()); - builder.append('/'); - - bool document_url_ends_in_slash = path()[path().length() - 1] == '/'; - - for (size_t i = 0; i < lexical_path.parts().size(); ++i) { - if (i == lexical_path.parts().size() - 1 && !document_url_ends_in_slash) - break; - builder.append(lexical_path.parts()[i]); - builder.append('/'); - } - builder.append(string); - auto built = builder.to_string(); - lexical_path = LexicalPath(built); - - built = lexical_path.string(); - if (string.ends_with('/') && !built.ends_with('/')) { - builder.clear(); - builder.append(built); - builder.append('/'); - built = builder.to_string(); - } - - url = *this; - url.set_path(built); - return url; + return URLParser::parse({}, string, this); } void URL::set_scheme(const String& scheme) @@ -121,7 +121,6 @@ private: { } - bool parse(const StringView&); bool compute_validity() const; String serialize_data_url() const; |