diff options
author | Sam Atkins <atkinssj@serenityos.org> | 2023-03-06 14:19:39 +0000 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2023-03-07 00:43:36 +0100 |
commit | 17618989a39c01656ac1e23c015c5803a9542314 (patch) | |
tree | 7f85e8ed3c05b226e877719cde709d72389829ef /Userland | |
parent | 98ee2fcd1beddbfd58fd1f17b3595a8e4d0ae414 (diff) | |
download | serenity-17618989a39c01656ac1e23c015c5803a9542314.zip |
LibWeb: Propagate errors from CSS Tokenizer construction
Instead of constructing a Tokenizer and then calling parse() on it, we
now call `Tokenizer::tokenize(...)` directly. (Renamed from `parse()`
because this is a Tokenizer, not a Parser.)
Diffstat (limited to 'Userland')
5 files changed, 50 insertions, 47 deletions
diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp index 1021994fe0..16258b3f9e 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -84,8 +84,7 @@ AK::URL ParsingContext::complete_url(StringView relative_url) const Parser::Parser(ParsingContext const& context, StringView input, StringView encoding) : m_context(context) - , m_tokenizer(input, encoding) - , m_tokens(m_tokenizer.parse()) + , m_tokens(Tokenizer::tokenize(input, encoding).release_value_but_fixme_should_propagate_errors()) , m_token_stream(TokenStream(m_tokens)) { } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h index c518f8eda3..e4f5d71f88 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h @@ -360,7 +360,6 @@ private: ParsingContext m_context; - Tokenizer m_tokenizer; Vector<Token> m_tokens; TokenStream<Token> m_token_stream; }; diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index f808bbaea7..d739e48166 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -195,7 +195,7 @@ static inline bool is_E(u32 code_point) return code_point == 0x45; } -Tokenizer::Tokenizer(StringView input, StringView encoding) +ErrorOr<Vector<Token>> Tokenizer::tokenize(StringView input, StringView encoding) { // https://www.w3.org/TR/css-syntax-3/#css-filter-code-points auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr<String> { @@ -206,48 +206,53 @@ Tokenizer::Tokenizer(StringView input, StringView encoding) bool last_was_carriage_return = false; // To filter code points from a stream of (unfiltered) code points input: - decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> { - // Replace any U+000D CARRIAGE RETURN (CR) code points, - // U+000C FORM FEED (FF) code points, - // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) - // in input by a single U+000A LINE FEED (LF) code point. - if (code_point == '\r') { - if (last_was_carriage_return) { - TRY(builder.try_append('\n')); - } else { - last_was_carriage_return = true; - } - } else { - if (last_was_carriage_return) - TRY(builder.try_append('\n')); - - if (code_point == '\n') { - if (!last_was_carriage_return) - TRY(builder.try_append('\n')); - - } else if (code_point == '\f') { - TRY(builder.try_append('\n')); - // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { - TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER)); - } else { - TRY(builder.try_append_code_point(code_point)); - } - - last_was_carriage_return = false; - } - return {}; - }) - .release_value_but_fixme_should_propagate_errors(); + TRY(decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> { + // Replace any U+000D CARRIAGE RETURN (CR) code points, + // U+000C FORM FEED (FF) code points, + // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) + // in input by a single U+000A LINE FEED (LF) code point. + if (code_point == '\r') { + if (last_was_carriage_return) { + TRY(builder.try_append('\n')); + } else { + last_was_carriage_return = true; + } + } else { + if (last_was_carriage_return) + TRY(builder.try_append('\n')); + + if (code_point == '\n') { + if (!last_was_carriage_return) + TRY(builder.try_append('\n')); + + } else if (code_point == '\f') { + TRY(builder.try_append('\n')); + // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { + TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER)); + } else { + TRY(builder.try_append_code_point(code_point)); + } + + last_was_carriage_return = false; + } + return {}; + })); return builder.to_string(); }; - m_decoded_input = filter_code_points(input, encoding).release_value_but_fixme_should_propagate_errors(); - m_utf8_view = Utf8View(m_decoded_input); - m_utf8_iterator = m_utf8_view.begin(); + Tokenizer tokenizer { TRY(filter_code_points(input, encoding)) }; + return tokenizer.tokenize(); +} + +Tokenizer::Tokenizer(String decoded_input) + : m_decoded_input(move(decoded_input)) + , m_utf8_view(m_decoded_input) + , m_utf8_iterator(m_utf8_view.begin()) +{ } -Vector<Token> Tokenizer::parse() +Vector<Token> Tokenizer::tokenize() { Vector<Token> tokens; for (;;) { diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h index c7d846c8ba..a204c02178 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h @@ -58,15 +58,16 @@ public: }; class Tokenizer { - public: - explicit Tokenizer(StringView input, StringView encoding); - - [[nodiscard]] Vector<Token> parse(); + static ErrorOr<Vector<Token>> tokenize(StringView input, StringView encoding); [[nodiscard]] static Token create_eof_token(); private: + explicit Tokenizer(String decoded_input); + + [[nodiscard]] Vector<Token> tokenize(); + [[nodiscard]] u32 next_code_point(); [[nodiscard]] u32 peek_code_point(size_t offset = 0) const; [[nodiscard]] U32Twin peek_twin() const; diff --git a/Userland/Libraries/LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.cpp b/Userland/Libraries/LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.cpp index 39f207f61f..fc7950edf3 100644 --- a/Userland/Libraries/LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.cpp +++ b/Userland/Libraries/LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.cpp @@ -45,8 +45,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette) false); }; - CSS::Parser::Tokenizer tokenizer { text, "utf-8"sv }; - auto tokens = tokenizer.parse(); + auto tokens = CSS::Parser::Tokenizer::tokenize(text, "utf-8"sv).release_value_but_fixme_should_propagate_errors(); for (auto const& token : tokens) { if (token.is(Parser::Token::Type::EndOfFile)) break; |