From 3685a8813a20cb6ef93375206d75cf55a81a0da9 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Wed, 15 Feb 2023 11:24:38 +0000 Subject: LibWeb: Port CSS Tokenizer to new Strings Specifically, this uses FlyString, because the data gets held long-term as a FlyString anyway. --- Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 46 +++++++++++----------- Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h | 8 ++-- 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'Userland/Libraries/LibWeb') diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index c11732687f..baa16d709a 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace Web::CSS::Parser { @@ -198,7 +198,7 @@ static inline bool is_E(u32 code_point) Tokenizer::Tokenizer(StringView input, StringView encoding) { // https://www.w3.org/TR/css-syntax-3/#css-filter-code-points - auto filter_code_points = [](StringView input, auto encoding) -> DeprecatedString { + auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr { auto* decoder = TextCodec::decoder_for(encoding); VERIFY(decoder); @@ -237,10 +237,10 @@ Tokenizer::Tokenizer(StringView input, StringView encoding) last_was_carriage_return = false; } }); - return builder.to_deprecated_string(); + return builder.to_string(); }; - m_decoded_input = filter_code_points(input, encoding); + m_decoded_input = filter_code_points(input, encoding).release_value_but_fixme_should_propagate_errors(); m_utf8_view = Utf8View(m_decoded_input); m_utf8_iterator = m_utf8_view.begin(); } @@ -352,11 +352,11 @@ Token Tokenizer::create_eof_token() return create_new_token(Token::Type::EndOfFile); } -Token Tokenizer::create_value_token(Token::Type type, DeprecatedString value) +Token Tokenizer::create_value_token(Token::Type type, FlyString&& value) { Token token; token.m_type = type; - token.m_value = FlyString::from_utf8(value.view()).release_value_but_fixme_should_propagate_errors(); + token.m_value = move(value); return token; } @@ -364,7 +364,7 @@ Token Tokenizer::create_value_token(Token::Type type, u32 value) { Token token = {}; token.m_type = type; - token.m_value = FlyString(String::from_code_point(value)); + token.m_value = String::from_code_point(value); return token; } @@ -397,7 +397,7 @@ u32 Tokenizer::consume_escaped_code_point() } // Interpret the hex digits as a hexadecimal number. - auto unhexed = strtoul(builder.to_deprecated_string().characters(), nullptr, 16); + auto unhexed = AK::StringUtils::convert_to_uint_from_hex(builder.string_view()).value_or(0); // If this number is zero, or is for a surrogate, or is greater than the maximum allowed // code point, return U+FFFD REPLACEMENT CHARACTER (�). if (unhexed == 0 || is_unicode_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) { @@ -427,11 +427,11 @@ Token Tokenizer::consume_an_ident_like_token() // It returns an , , , or . // Consume an ident sequence, and let string be the result. - auto string = consume_an_ident_sequence(); + auto string = consume_an_ident_sequence().release_value_but_fixme_should_propagate_errors(); // If string’s value is an ASCII case-insensitive match for "url", and the next input code // point is U+0028 LEFT PARENTHESIS ((), consume it. - if (string.equals_ignoring_case("url"sv) && is_left_paren(peek_code_point())) { + if (Infra::is_ascii_case_insensitive_match(string, "url"sv) && is_left_paren(peek_code_point())) { (void)next_code_point(); // While the next two input code points are whitespace, consume the next input code point. @@ -449,7 +449,7 @@ Token Tokenizer::consume_an_ident_like_token() // with its value set to string and return it. auto next_two = peek_twin(); if (is_quotation_mark(next_two.first) || is_apostrophe(next_two.first) || (is_whitespace(next_two.first) && (is_quotation_mark(next_two.second) || is_apostrophe(next_two.second)))) { - return create_value_token(Token::Type::Function, string); + return create_value_token(Token::Type::Function, move(string)); } // Otherwise, consume a url token, and return it. @@ -461,11 +461,11 @@ Token Tokenizer::consume_an_ident_like_token() (void)next_code_point(); // Create a with its value set to string and return it. - return create_value_token(Token::Type::Function, string); + return create_value_token(Token::Type::Function, move(string)); } // Otherwise, create an with its value set to string and return it. - return create_value_token(Token::Type::Ident, string); + return create_value_token(Token::Type::Ident, move(string)); } // https://www.w3.org/TR/css-syntax-3/#consume-number @@ -573,7 +573,7 @@ float Tokenizer::convert_a_string_to_a_number(StringView string) } // https://www.w3.org/TR/css-syntax-3/#consume-name -DeprecatedString Tokenizer::consume_an_ident_sequence() +ErrorOr Tokenizer::consume_an_ident_sequence() { // This section describes how to consume an ident sequence from a stream of code points. // It returns a string containing the largest name that can be formed from adjacent @@ -597,14 +597,14 @@ DeprecatedString Tokenizer::consume_an_ident_sequence() // name code point if (is_ident_code_point(input)) { // Append the code point to result. - result.append_code_point(input); + TRY(result.try_append_code_point(input)); continue; } // the stream starts with a valid escape if (is_valid_escape_sequence(start_of_input_stream_twin())) { // Consume an escaped code point. Append the returned code point to result. - result.append_code_point(consume_escaped_code_point()); + TRY(result.try_append_code_point(consume_escaped_code_point())); continue; } @@ -614,7 +614,7 @@ DeprecatedString Tokenizer::consume_an_ident_sequence() break; } - return result.to_deprecated_string(); + return result.to_fly_string(); } // https://www.w3.org/TR/css-syntax-3/#consume-url-token @@ -778,10 +778,10 @@ Token Tokenizer::consume_a_numeric_token() token.m_number_value = number; // 2. Consume an ident sequence. Set the ’s unit to the returned value. - auto unit = consume_an_ident_sequence(); + auto unit = consume_an_ident_sequence().release_value_but_fixme_should_propagate_errors(); VERIFY(!unit.is_empty()); // NOTE: We intentionally store this in the `value`, to save space. - token.m_value = FlyString::from_utf8(unit.view()).release_value_but_fixme_should_propagate_errors(); + token.m_value = move(unit); // 3. Return the . return token; @@ -1065,8 +1065,8 @@ Token Tokenizer::consume_a_token() token.m_hash_type = Token::HashType::Id; // 3. Consume an ident sequence, and set the ’s value to the returned string. - auto name = consume_an_ident_sequence(); - token.m_value = FlyString::from_utf8(name.view()).release_value_but_fixme_should_propagate_errors(); + auto name = consume_an_ident_sequence().release_value_but_fixme_should_propagate_errors(); + token.m_value = move(name); // 4. Return the . return token; @@ -1201,8 +1201,8 @@ Token Tokenizer::consume_a_token() // If the next 3 input code points would start an ident sequence, consume an ident sequence, create // an with its value set to the returned value, and return it. if (would_start_an_ident_sequence(peek_triplet())) { - auto name = consume_an_ident_sequence(); - return create_value_token(Token::Type::AtKeyword, name); + auto name = consume_an_ident_sequence().release_value_but_fixme_should_propagate_errors(); + return create_value_token(Token::Type::AtKeyword, move(name)); } // Otherwise, return a with its value set to the current input code point. diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h index 83b24d11ea..c7d846c8ba 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h @@ -1,12 +1,12 @@ /* * Copyright (c) 2020-2021, the SerenityOS developers. + * Copyright (c) 2021-2023, Sam Atkins * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once -#include #include #include #include @@ -76,7 +76,7 @@ private: [[nodiscard]] U32Triplet start_of_input_stream_triplet(); [[nodiscard]] static Token create_new_token(Token::Type); - [[nodiscard]] static Token create_value_token(Token::Type, DeprecatedString value); + [[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value); [[nodiscard]] static Token create_value_token(Token::Type, u32 value); [[nodiscard]] Token consume_a_token(); [[nodiscard]] Token consume_string_token(u32 ending_code_point); @@ -84,7 +84,7 @@ private: [[nodiscard]] Token consume_an_ident_like_token(); [[nodiscard]] Number consume_a_number(); [[nodiscard]] float convert_a_string_to_a_number(StringView); - [[nodiscard]] DeprecatedString consume_an_ident_sequence(); + [[nodiscard]] ErrorOr consume_an_ident_sequence(); [[nodiscard]] u32 consume_escaped_code_point(); [[nodiscard]] Token consume_a_url_token(); void consume_the_remnants_of_a_bad_url(); @@ -95,7 +95,7 @@ private: [[nodiscard]] static bool would_start_an_ident_sequence(U32Triplet); [[nodiscard]] static bool would_start_a_number(U32Triplet); - DeprecatedString m_decoded_input; + String m_decoded_input; Utf8View m_utf8_view; AK::Utf8CodePointIterator m_utf8_iterator; AK::Utf8CodePointIterator m_prev_utf8_iterator; -- cgit v1.2.3