From 036b5c28049e56a83ed4a558d1da8e698e13fda3 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 22 Mar 2021 17:41:47 +0100 Subject: LibWeb: Import new CSS parser It's not finished or hooked up to anything yet, but let's get it into the tree so we can continue working on it. Original work by @stelar7. --- Userland/Libraries/LibWeb/CMakeLists.txt | 1 + Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp | 568 ++++++++++++++++++++++++ Userland/Libraries/LibWeb/CSS/Parser/Parser.h | 107 +++++ 3 files changed, 676 insertions(+) create mode 100644 Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp create mode 100644 Userland/Libraries/LibWeb/CSS/Parser/Parser.h diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 4e941ddbe4..4f57a2c46b 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -19,6 +19,7 @@ set(SOURCES CSS/DefaultStyleSheetSource.cpp CSS/Length.cpp CSS/Parser/DeprecatedCSSParser.cpp + CSS/Parser/Parser.cpp CSS/Parser/StyleRules.cpp CSS/Parser/Token.cpp CSS/Parser/Tokenizer.cpp diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp new file mode 100644 index 0000000000..2c9b51785d --- /dev/null +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2020-2021, SerenityOS developers + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CSS_PARSER_TRACE 1 + +#define PARSE_ERROR() \ + do { \ + dbgln_if(CSS_PARSER_TRACE, "Parse error (CSS) {} @ {}", __PRETTY_FUNCTION__, __LINE__); \ + } while (0) + +namespace Web::CSS { + +Parser::Parser(const StringView& input, const String& encoding) + : m_tokenizer(input, encoding) +{ + m_tokens = m_tokenizer.parse(); +} + +Parser::~Parser() +{ +} + +Token Parser::peek_token() +{ + size_t next_offset = m_iterator_offset + 1; + + if (next_offset < m_tokens.size()) { + return m_tokens.at(next_offset); + } + + return m_tokens.at(m_iterator_offset); +} + +Token Parser::next_token() +{ + if (m_iterator_offset < (int)m_tokens.size()) { + ++m_iterator_offset; + } + + auto token = m_tokens.at(m_iterator_offset); + + return token; +} + +Token Parser::current_token() +{ + return m_tokens.at(m_iterator_offset); +} + +Vector Parser::parse_as_stylesheet() +{ + auto rules = consume_a_list_of_rules(true); + + dbgln("Printing rules:"); + + for (auto& rule : rules) { + dbgln("PRE:"); + for (auto& pre : rule.m_prelude) { + dbgln("{}", pre); + } + dbgln("BLOCK:"); + dbgln("{}", rule.m_block.to_string()); + dbgln(""); + + auto selectors = parse_selectors(rule.m_prelude); + } + + return rules; +} + +Vector Parser::parse_selectors(Vector parts) +{ + (void)parts; + Vector selectors; + return selectors; +} + +void Parser::dump_all_tokens() +{ + dbgln("Dumping all tokens:"); + for (auto& token : m_tokens) + dbgln("{}", token.to_string()); +} + +void Parser::reconsume_current_input_token() +{ + --m_iterator_offset; +} + +Vector Parser::consume_a_list_of_rules(bool top_level) +{ + Vector rules; + + for (;;) { + auto token = next_token(); + + if (token.is_whitespace()) { + continue; + } + + if (token.is_eof()) { + break; + } + + if (token.is_cdo() || token.is_cdc()) { + if (top_level) { + continue; + } + + reconsume_current_input_token(); + auto maybe_qualified = consume_a_qualified_rule(); + if (maybe_qualified.has_value()) { + rules.append(maybe_qualified.value()); + } + + continue; + } + + if (token.is_at()) { + reconsume_current_input_token(); + rules.append(consume_an_at_rule()); + continue; + } + + reconsume_current_input_token(); + auto maybe_qualified = consume_a_qualified_rule(); + if (maybe_qualified.has_value()) { + rules.append(maybe_qualified.value()); + } + } + + return rules; +} + +AtStyleRule Parser::consume_an_at_rule() +{ + auto initial = next_token(); + + AtStyleRule rule; + rule.m_name = initial.m_value.to_string(); + + for (;;) { + auto token = next_token(); + if (token.is_semicolon()) { + return rule; + } + + if (token.is_eof()) { + PARSE_ERROR(); + return rule; + } + + if (token.is_open_curly()) { + rule.m_block = consume_a_simple_block(); + return rule; + } + + // how is "simple block with an associated token of <{-token>" a valid token? + + reconsume_current_input_token(); + auto value = consume_a_component_value(); + if (value.m_type == StyleComponentValueRule::ComponentType::Token) { + if (value.m_token.is_whitespace()) { + continue; + } + } + rule.m_prelude.append(value.to_string()); + } +} + +Optional Parser::consume_a_qualified_rule() +{ + QualifiedStyleRule rule; + + for (;;) { + auto token = next_token(); + + if (token.is_eof()) { + PARSE_ERROR(); + return {}; + } + + if (token.is_open_curly()) { + rule.m_block = consume_a_simple_block(); + return rule; + } + + // how is "simple block with an associated token of <{-token>" a valid token? + + reconsume_current_input_token(); + auto value = consume_a_component_value(); + if (value.m_type == StyleComponentValueRule::ComponentType::Token) { + if (value.m_token.is_whitespace()) { + continue; + } + } + rule.m_prelude.append(value.to_string()); + } + + return rule; +} + +StyleComponentValueRule Parser::consume_a_component_value() +{ + auto token = next_token(); + + if (token.is_open_curly() || token.is_open_square() || token.is_open_paren()) { + auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Block); + component.m_block = consume_a_simple_block(); + return component; + } + + if (token.is_function()) { + auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Function); + component.m_function = consume_a_function(); + return component; + } + + auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token); + component.m_token = token; + return component; +} + +StyleBlockRule Parser::consume_a_simple_block() +{ + auto ending_token = current_token().mirror_variant(); + + StyleBlockRule block; + block.m_token = current_token(); + + for (;;) { + auto token = next_token(); + + if (token.m_type == ending_token) { + return block; + } + + if (token.is_eof()) { + PARSE_ERROR(); + return block; + } + + reconsume_current_input_token(); + auto value = consume_a_component_value(); + if (value.m_type == StyleComponentValueRule::ComponentType::Token) { + if (value.m_token.is_whitespace()) { + continue; + } + } + block.m_values.append(value.to_string()); + } +} + +StyleFunctionRule Parser::consume_a_function() +{ + StyleFunctionRule function; + function.m_name = current_token().m_value.to_string(); + + for (;;) { + auto token = next_token(); + if (token.is_close_paren()) { + return function; + } + + if (token.is_eof()) { + PARSE_ERROR(); + return function; + } + + reconsume_current_input_token(); + auto value = consume_a_component_value(); + if (value.m_type == StyleComponentValueRule::ComponentType::Token) { + if (value.m_token.is_whitespace()) { + continue; + } + } + function.m_values.append(value.to_string()); + } + + return function; +} +Optional Parser::consume_a_declaration(Vector) +{ + TODO(); +} + +Optional Parser::consume_a_declaration() +{ + auto token = next_token(); + + StyleDeclarationRule declaration; + declaration.m_name = token.m_value.to_string(); + + for (;;) { + if (!peek_token().is_whitespace()) { + break; + } + next_token(); + } + + auto colon = next_token(); + + if (!colon.is_colon()) { + PARSE_ERROR(); + return {}; + } + + for (;;) { + if (!peek_token().is_whitespace()) { + break; + } + next_token(); + } + + for (;;) { + if (peek_token().is_eof()) { + break; + } + declaration.m_values.append(consume_a_component_value()); + } + + auto second_last = declaration.m_values.at(declaration.m_values.size() - 2); + auto last = declaration.m_values.at(declaration.m_values.size() - 1); + + if (second_last.m_type == StyleComponentValueRule::ComponentType::Token && last.m_type == StyleComponentValueRule::ComponentType::Token) { + auto last_token = last.m_token; + auto second_last_token = second_last.m_token; + + if (second_last_token.is_delim() && second_last_token.m_value.to_string().equals_ignoring_case("!")) { + if (last_token.is_ident() && last_token.m_value.to_string().equals_ignoring_case("important")) { + declaration.m_values.remove(declaration.m_values.size() - 2); + declaration.m_values.remove(declaration.m_values.size() - 1); + declaration.m_important = true; + } + } + } + + for (;;) { + auto maybe_whitespace = declaration.m_values.at(declaration.m_values.size() - 1); + if (!(maybe_whitespace.m_type == StyleComponentValueRule::ComponentType::Token && maybe_whitespace.m_token.is_whitespace())) { + break; + } + declaration.m_values.remove(declaration.m_values.size() - 1); + } + + return declaration; +} + +Vector Parser::consume_a_list_of_declarations() +{ + Vector list; + + for (;;) { + auto token = next_token(); + if (token.is_whitespace() || token.is_semicolon()) { + continue; + } + + if (token.is_eof()) { + return list; + } + + if (token.is_at()) { + reconsume_current_input_token(); + list.append(DeclarationOrAtRule(consume_an_at_rule())); + continue; + } + + if (token.is_ident()) { + Vector temp; + + auto component = StyleComponentValueRule(StyleComponentValueRule::ComponentType::Token); + component.m_token = token; + temp.append(component); + + for (;;) { + auto peek = peek_token(); + if (peek.is_semicolon() || peek.is_eof()) { + break; + } + temp.append(consume_a_component_value()); + } + + auto maybe_declaration = consume_a_declaration(temp); + if (maybe_declaration.has_value()) { + list.append(DeclarationOrAtRule(maybe_declaration.value())); + } + } + + PARSE_ERROR(); + reconsume_current_input_token(); + auto peek = peek_token(); + if (!(peek.is_semicolon() || peek.is_eof())) { + consume_a_component_value(); + } + } + + return list; +} + +Optional Parser::parse_as_rule() +{ + Optional rule; + + for (;;) { + auto maybe_whitespace = peek_token(); + if (!maybe_whitespace.is_whitespace()) { + break; + } + next_token(); + } + + auto token = peek_token(); + + if (token.is_eof()) { + return {}; + } + + if (token.is_at()) { + rule = consume_an_at_rule(); + } else { + rule = consume_a_qualified_rule(); + } + + for (;;) { + auto maybe_whitespace = peek_token(); + if (!token.is_whitespace()) { + break; + } + next_token(); + } + + auto maybe_eof = peek_token(); + if (maybe_eof.is_eof()) { + return rule; + } + + return {}; +} + +Vector Parser::parse_as_list_of_rules() +{ + return consume_a_list_of_rules(false); +} + +Optional Parser::parse_as_declaration() +{ + for (;;) { + auto maybe_whitespace = peek_token(); + if (!maybe_whitespace.is_whitespace()) { + break; + } + next_token(); + } + + auto token = peek_token(); + + if (!token.is_ident()) { + return {}; + } + + return consume_a_declaration(); +} +Vector Parser::parse_as_list_of_declarations() +{ + return consume_a_list_of_declarations(); +} + +Optional Parser::parse_as_component_value() +{ + for (;;) { + auto maybe_whitespace = peek_token(); + if (!maybe_whitespace.is_whitespace()) { + break; + } + next_token(); + } + + auto token = peek_token(); + + if (token.is_eof()) { + return {}; + } + + auto value = consume_a_component_value(); + + for (;;) { + auto maybe_whitespace = peek_token(); + if (!token.is_whitespace()) { + break; + } + next_token(); + } + + auto maybe_eof = peek_token(); + if (maybe_eof.is_eof()) { + return value; + } + + return {}; +} +Vector Parser::parse_as_list_of_component_values() +{ + Vector rules; + + for (;;) { + if (peek_token().is_eof()) { + break; + } + + rules.append(consume_a_component_value()); + } + + return rules; +} + +Vector Parser::parse_as_list_of_comma_separated_component_values() +{ + Vector rules; + + for (;;) { + rules.append(consume_a_component_value()); + + if (peek_token().is_comma()) + continue; + if (peek_token().is_eof()) + break; + } + + return rules; +} + +} diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h new file mode 100644 index 0000000000..402d9236fe --- /dev/null +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020-2021, SerenityOS developers + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Web::CSS { + +class Parser { +public: + Parser(const StringView& input, const String& encoding = "utf-8"); + ~Parser(); + + // The normal parser entry point, for parsing stylesheets. + Vector parse_as_stylesheet(); + // For the content of at-rules such as @media. It differs from "Parse a stylesheet" in the handling of and . + Vector parse_as_list_of_rules(); + // For use by the CSSStyleSheet#insertRule method, and similar functions which might exist, which parse text into a single rule. + Optional parse_as_rule(); + // Used in @supports conditions. [CSS3-CONDITIONAL] + Optional parse_as_declaration(); + // For the contents of a style attribute, which parses text into the contents of a single style rule. + Vector parse_as_list_of_declarations(); + // For things that need to consume a single value, like the parsing rules for attr(). + Optional parse_as_component_value(); + // For the contents of presentational attributes, which parse text into a single declaration’s value, or for parsing a stand-alone selector [SELECT] or list of Media Queries [MEDIAQ], as in Selectors API or the media HTML attribute. + Vector parse_as_list_of_component_values(); + + Vector parse_as_list_of_comma_separated_component_values(); + + Vector parse_selectors(Vector parts); + + // FIXME: https://www.w3.org/TR/selectors-4/ + Optional parse_a_selector() { return {}; } + Optional parse_a_relative_selector() { return {}; } + bool match_a_selector_against_an_element() { return false; } + bool match_a_selector_against_a_pseudo_element() { return false; } + bool match_a_selector_against_a_tree() { return false; } + + // FIXME: https://drafts.csswg.org/css-backgrounds-3/ + static Optional as_valid_background_repeat(String input) { return input; } + static Optional as_valid_background_attachment(String input) { return input; } + static Optional as_valid_background_position(String input) { return input; } + static Optional as_valid_background_clip(String input) { return input; } + static Optional as_valid_background_origin(String input) { return input; } + static Optional as_valid_background_size(String input) { return input; } + static Optional as_valid_border_style(String input) { return input; } + static Optional as_valid_border_image_repeat(String input) { return input; } + + void dump_all_tokens(); + +private: + Token next_token(); + Token peek_token(); + Token current_token(); + void reconsume_current_input_token(); + + Vector consume_a_list_of_rules(bool top_level); + AtStyleRule consume_an_at_rule(); + Optional consume_a_qualified_rule(); + Vector consume_a_list_of_declarations(); + Optional consume_a_declaration(Vector); + Optional consume_a_declaration(); + StyleComponentValueRule consume_a_component_value(); + StyleBlockRule consume_a_simple_block(); + StyleFunctionRule consume_a_function(); + + Tokenizer m_tokenizer; + Vector m_tokens; + int m_iterator_offset { -1 }; +}; + +} -- cgit v1.2.3