diff options
author | Sam Atkins <atkinssj@gmail.com> | 2021-07-03 14:00:41 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-07-11 23:19:56 +0200 |
commit | b7116711bfa46958227fbc2d299b9e4a3bda63c0 (patch) | |
tree | e6270bc64dcada943410aee7402f3ebc85402692 /Userland/Libraries/LibWeb/CSS | |
parent | 6c03123b2de436d1b3a6a935c2d3c231da865212 (diff) | |
download | serenity-b7116711bfa46958227fbc2d299b9e4a3bda63c0.zip |
LibWeb: Add TokenStream class to CSS Parser
The entry points for CSS parsing in the spec are defined as accepting
any of a stream of Tokens, or a stream of ComponentValues, or a String.
TokenStream is an attempt to reduce the duplication of code for that.
Diffstat (limited to 'Userland/Libraries/LibWeb/CSS')
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp | 103 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Parser.h | 36 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 5 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h | 2 |
4 files changed, 109 insertions, 37 deletions
diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp index 174b63c4be..8aa5dcdee3 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -53,44 +53,99 @@ URL ParsingContext::complete_url(String const& addr) const return m_document ? m_document->url().complete_url(addr) : URL::create_with_url_or_path(addr); } -Parser::Parser(ParsingContext const& context, StringView const& input, String const& encoding) - : m_context(context) - , m_tokenizer(input, encoding) +template<typename T> +TokenStream<T>::TokenStream(Vector<T> const& tokens) + : m_tokens(tokens) + , m_eof(make_eof()) { - m_tokens = m_tokenizer.parse(); } -Parser::~Parser() +template<typename T> +TokenStream<T>::~TokenStream() { } -Token Parser::peek_token() +template<typename T> +bool TokenStream<T>::has_next_token() { - size_t next_offset = m_iterator_offset + 1; + return (size_t)(m_iterator_offset + 1) < m_tokens.size(); +} - if (next_offset < m_tokens.size()) { - return m_tokens.at(next_offset); - } +template<typename T> +T const& TokenStream<T>::peek_token() +{ + if (!has_next_token()) + return m_eof; - return m_tokens.at(m_iterator_offset); + return m_tokens.at(m_iterator_offset + 1); } -Token Parser::next_token() +template<typename T> +T const& TokenStream<T>::next_token() { - if (m_iterator_offset < (int)m_tokens.size() - 1) { - ++m_iterator_offset; - } + if (!has_next_token()) + return m_eof; - auto token = m_tokens.at(m_iterator_offset); + ++m_iterator_offset; - return token; + return m_tokens.at(m_iterator_offset); } -Token Parser::current_token() +template<typename T> +T const& TokenStream<T>::current_token() { + if ((size_t)m_iterator_offset >= m_tokens.size()) + return m_eof; + return m_tokens.at(m_iterator_offset); } +template<typename T> +void TokenStream<T>::reconsume_current_input_token() +{ + VERIFY(m_iterator_offset >= 0); + --m_iterator_offset; +} + +template<typename T> +void TokenStream<T>::skip_whitespace() +{ + while (peek_token().is(Token::Type::Whitespace)) + next_token(); +} + +template<> +Token TokenStream<Token>::make_eof() +{ + return Tokenizer::create_eof_token(); +} + +template<> +StyleComponentValueRule TokenStream<StyleComponentValueRule>::make_eof() +{ + return StyleComponentValueRule(Tokenizer::create_eof_token()); +} + +template<typename T> +void TokenStream<T>::dump_all_tokens() +{ + dbgln("Dumping all tokens:"); + for (auto& token : m_tokens) + dbgln("{}", token.to_string()); +} + +Parser::Parser(ParsingContext const& context, StringView const& input, String const& encoding) + : m_context(context) + , m_tokenizer(input, encoding) + , m_tokens(m_tokenizer.parse()) + , m_token_stream(TokenStream(m_tokens)) +{ +} + +Parser::~Parser() +{ +} + NonnullRefPtr<CSSStyleSheet> Parser::parse_as_stylesheet() { auto parser_rules = consume_a_list_of_rules(true); @@ -440,18 +495,6 @@ Optional<Selector> Parser::parse_single_selector(Vector<StyleComponentValueRule> return Selector(move(selectors)); } -void Parser::dump_all_tokens() -{ - dbgln("Dumping all tokens:"); - for (auto& token : m_tokens) - dbgln("{}", token.to_string()); -} - -void Parser::reconsume_current_input_token() -{ - --m_iterator_offset; -} - NonnullRefPtrVector<StyleRule> Parser::consume_a_list_of_rules(bool top_level) { NonnullRefPtrVector<StyleRule> rules; diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h index d4257b479d..63764b252b 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h @@ -39,6 +39,30 @@ private: const DOM::Document* m_document { nullptr }; }; +template<typename T> +class TokenStream { +public: + explicit TokenStream(Vector<T> const&); + ~TokenStream(); + + bool has_next_token(); + T const& next_token(); + T const& peek_token(); + T const& current_token(); + void reconsume_current_input_token(); + + void skip_whitespace(); + + void dump_all_tokens(); + +private: + Vector<T> const& m_tokens; + int m_iterator_offset { -1 }; + + T make_eof(); + T m_eof; +}; + class Parser { public: Parser(ParsingContext const&, StringView const& input, String const& encoding = "utf-8"); @@ -82,13 +106,11 @@ public: static Optional<String> as_valid_border_style(String input) { return input; } static Optional<String> as_valid_border_image_repeat(String input) { return input; } - void dump_all_tokens(); - private: - Token next_token(); - Token peek_token(); - Token current_token(); - void reconsume_current_input_token(); + Token next_token() { return m_token_stream.next_token(); } + Token peek_token() { return m_token_stream.peek_token(); } + Token current_token() { return m_token_stream.current_token(); } + void reconsume_current_input_token() { m_token_stream.reconsume_current_input_token(); } NonnullRefPtrVector<StyleRule> consume_a_list_of_rules(bool top_level); NonnullRefPtr<StyleRule> consume_an_at_rule(); @@ -106,7 +128,7 @@ private: Tokenizer m_tokenizer; Vector<Token> m_tokens; - int m_iterator_offset { -1 }; + TokenStream<Token> m_token_stream; }; } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 162720f260..c01dd8bad7 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -269,6 +269,11 @@ Token Tokenizer::create_new_token(Token::Type type) return token; } +Token Tokenizer::create_eof_token() +{ + return create_new_token(Token::Type::EndOfFile); +} + Token Tokenizer::create_value_token(Token::Type type, String value) { Token token; diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h index 8baec2bc04..41872a64af 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h @@ -70,6 +70,8 @@ public: [[nodiscard]] Vector<Token> parse(); + [[nodiscard]] static Token create_eof_token(); + private: [[nodiscard]] u32 next_code_point(); [[nodiscard]] u32 peek_code_point(size_t offset = 0) const; |