diff options
author | Sam Atkins <atkinssj@gmail.com> | 2021-07-24 21:22:44 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-07-31 00:18:11 +0200 |
commit | 6034fc0ee68ef23e4b45e22120b7e9694e189591 (patch) | |
tree | b7b259f5b7a242d6adc96cdabf3e9aa0af34694b /Userland/Libraries/LibWeb | |
parent | 8d0ff98efffe0a31f1787303b6935c7e52f1746c (diff) | |
download | serenity-6034fc0ee68ef23e4b45e22120b7e9694e189591.zip |
LibWeb: Make CSS 'An+B' parsing spec-compliant
Parsing this pattern from CSS tokens turns out to be slightly crazy, but
thankfully well documented in the spec.
The spec lists the cases in order of simple -> complicated, but this
would cause problems in code, since `<n-dimension> <signed-.integer>`
would never by reached, as `<n-dimension>` comes before. Instead, I
have grouped them by their first token.
Also renamed the NthChildPattern class to ANPlusBPattern, to match spec
terminology.
Diffstat (limited to 'Userland/Libraries/LibWeb')
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/DeprecatedCSSParser.cpp | 4 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp | 348 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Parser.h | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Token.cpp | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Token.h | 29 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Selector.cpp | 4 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Selector.h | 16 |
7 files changed, 323 insertions, 82 deletions
diff --git a/Userland/Libraries/LibWeb/CSS/Parser/DeprecatedCSSParser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/DeprecatedCSSParser.cpp index 1813f6df62..90e62a27f2 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/DeprecatedCSSParser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/DeprecatedCSSParser.cpp @@ -1046,10 +1046,10 @@ public: pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::LastOfType; } else if (pseudo_name.starts_with("nth-child", CaseSensitivity::CaseInsensitive)) { pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild; - pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::NthChildPattern::parse(capture_selector_args(pseudo_name)); + pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::ANPlusBPattern::parse(capture_selector_args(pseudo_name)); } else if (pseudo_name.starts_with("nth-last-child", CaseSensitivity::CaseInsensitive)) { pseudo_class.type = CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild; - pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::NthChildPattern::parse(capture_selector_args(pseudo_name)); + pseudo_class.nth_child_pattern = CSS::Selector::SimpleSelector::ANPlusBPattern::parse(capture_selector_args(pseudo_name)); } else if (pseudo_name.equals_ignoring_case("before")) { simple_selector.pseudo_element = CSS::Selector::SimpleSelector::PseudoElement::Before; } else if (pseudo_name.equals_ignoring_case("after")) { diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp index a19dcc0231..6d787eeaef 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -6,6 +6,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include <AK/CharacterTypes.h> #include <AK/Debug.h> #include <AK/NonnullRefPtrVector.h> #include <AK/SourceLocation.h> @@ -603,7 +604,6 @@ Result<Selector::SimpleSelector, Parser::SelectorParsingResult> Parser::parse_si simple_selector.pseudo_class.nth_child_pattern = nth_child_pattern.value(); } else { dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid nth-child format"); - function_values.dump_all_tokens(); return SelectorParsingResult::SyntaxError; } } else if (pseudo_function.name().equals_ignoring_case("nth-last-child")) { @@ -611,11 +611,9 @@ Result<Selector::SimpleSelector, Parser::SelectorParsingResult> Parser::parse_si auto function_values = TokenStream<StyleComponentValueRule>(pseudo_function.values()); auto nth_child_pattern = parse_a_n_plus_b_pattern(function_values); if (nth_child_pattern.has_value()) { - dbgln("+++ Got An+B pattern: '{}'", nth_child_pattern.value().to_string()); simple_selector.pseudo_class.nth_child_pattern = nth_child_pattern.value(); } else { dbgln_if(CSS_PARSER_DEBUG, "!!! Invalid nth-child format"); - function_values.dump_all_tokens(); return SelectorParsingResult::SyntaxError; } } else { @@ -1491,7 +1489,7 @@ RefPtr<StyleValue> Parser::parse_numeric_value(ParsingContext const&, StyleCompo if (component_value.is(Token::Type::Number)) { auto number = component_value.token(); if (number.m_number_type == Token::NumberType::Integer) { - return NumericStyleValue::create(number.integer()); + return NumericStyleValue::create(number.to_integer()); } else { auto float_value = try_parse_float(number.m_value.string_view()); if (float_value.has_value()) @@ -1748,7 +1746,7 @@ RefPtr<StyleValue> Parser::parse_css_value(ParsingContext const& context, Proper if (takes_integer_value(property_id) && component_value.is(Token::Type::Number)) { auto number = component_value.token(); if (number.m_number_type == Token::NumberType::Integer) { - return LengthStyleValue::create(Length::make_px(number.integer())); + return LengthStyleValue::create(Length::make_px(number.to_integer())); } } @@ -1776,90 +1774,306 @@ RefPtr<StyleValue> Parser::parse_css_value(ParsingContext const& context, Proper return {}; } -Optional<Selector::SimpleSelector::NthChildPattern> Parser::parse_nth_child_pattern(TokenStream<StyleComponentValueRule>& values) +Optional<Selector::SimpleSelector::ANPlusBPattern> Parser::parse_a_n_plus_b_pattern(TokenStream<StyleComponentValueRule>& values) { - dbgln_if(CSS_PARSER_DEBUG, "Parser::parse_nth_child_pattern"); + dbgln_if(CSS_PARSER_DEBUG, "Parser::parse_a_n_plus_b_pattern"); - Selector::SimpleSelector::NthChildPattern pattern; + int a = 0; + int b = 0; - auto current_value = values.next_token(); - if (current_value.is(Token::Type::Ident)) { - auto ident = current_value.token().ident(); - if (ident.equals_ignoring_case("odd")) { - pattern.step_size = 2; - pattern.offset = 1; - return pattern; + auto syntax_error = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> { + if constexpr (CSS_PARSER_DEBUG) { + dbgln("Invalid An+B value:"); + values.dump_all_tokens(); + } + return {}; + }; + + auto make_return_value = [&]() -> Optional<Selector::SimpleSelector::ANPlusBPattern> { + // When we think we are done, but there are more non-whitespace tokens, then it's a parse error. + values.skip_whitespace(); + if (values.has_next_token()) { + if constexpr (CSS_PARSER_DEBUG) { + dbgln("Extra tokens at end of An+B value:"); + values.dump_all_tokens(); + } + return syntax_error(); + } else { + return Selector::SimpleSelector::ANPlusBPattern { a, b }; + } + }; + + auto is_n = [](StyleComponentValueRule const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("n"sv); + }; + auto is_ndash = [](StyleComponentValueRule const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("n-"sv); + }; + auto is_dashn = [](StyleComponentValueRule const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("-n"sv); + }; + auto is_dashndash = [](StyleComponentValueRule const& value) -> bool { + return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("-n-"sv); + }; + auto is_delim = [](StyleComponentValueRule const& value, StringView const& delim) -> bool { + return value.is(Token::Type::Delim) && value.token().delim().equals_ignoring_case(delim); + }; + auto is_n_dimension = [](StyleComponentValueRule const& value) -> bool { + if (!value.is(Token::Type::Dimension)) + return false; + if (value.token().number_type() != Token::NumberType::Integer) + return false; + if (!value.token().dimension_unit().equals_ignoring_case("n"sv)) + return false; + return true; + }; + auto is_ndash_dimension = [](StyleComponentValueRule const& value) -> bool { + if (!value.is(Token::Type::Dimension)) + return false; + if (value.token().number_type() != Token::NumberType::Integer) + return false; + if (!value.token().dimension_unit().equals_ignoring_case("n-"sv)) + return false; + return true; + }; + auto is_ndashdigit_dimension = [](StyleComponentValueRule const& value) -> bool { + if (!value.is(Token::Type::Dimension)) + return false; + if (value.token().number_type() != Token::NumberType::Integer) + return false; + auto dimension_unit = value.token().dimension_unit(); + if (!dimension_unit.starts_with("n-"sv, CaseSensitivity::CaseInsensitive)) + return false; + for (size_t i = 2; i < dimension_unit.length(); ++i) { + if (!is_ascii_digit(dimension_unit[i])) + return false; + } + return true; + }; + auto is_ndashdigit_ident = [](StyleComponentValueRule const& value) -> bool { + if (!value.is(Token::Type::Ident)) + return false; + auto ident = value.token().ident(); + if (!ident.starts_with("n-"sv, CaseSensitivity::CaseInsensitive)) + return false; + for (size_t i = 2; i < ident.length(); ++i) { + if (!is_ascii_digit(ident[i])) + return false; + } + return true; + }; + auto is_dashndashdigit_ident = [](StyleComponentValueRule const& value) -> bool { + if (!value.is(Token::Type::Ident)) + return false; + auto ident = value.token().ident(); + if (!ident.starts_with("-n-"sv, CaseSensitivity::CaseInsensitive)) + return false; + for (size_t i = 3; i < ident.length(); ++i) { + if (!is_ascii_digit(ident[i])) + return false; + } + return true; + }; + auto is_integer = [](StyleComponentValueRule const& value) -> bool { + return value.is(Token::Type::Number) && value.token().is(Token::NumberType::Integer); + }; + auto is_signed_integer = [is_integer](StyleComponentValueRule const& value) -> bool { + return is_integer(value) && value.token().is_integer_value_signed(); + }; + auto is_signless_integer = [is_integer](StyleComponentValueRule const& value) -> bool { + return is_integer(value) && !value.token().is_integer_value_signed(); + }; + + // https://www.w3.org/TR/css-syntax-3/#the-anb-type + // Unfortunately these can't be in the same order as in the spec. + values.skip_whitespace(); + auto& first_value = values.next_token(); + + // odd | even + if (first_value.is(Token::Type::Ident)) { + auto ident = first_value.token().ident(); + if (ident.equals_ignoring_case("odd")) { + a = 2; + b = 1; + return make_return_value(); } else if (ident.equals_ignoring_case("even")) { - pattern.step_size = 2; - return pattern; + a = 2; + return make_return_value(); } } + // <integer> + if (is_integer(first_value)) { + b = first_value.token().to_integer(); + return make_return_value(); + } + // <n-dimension> + // <n-dimension> <signed-integer> + // <n-dimension> ['+' | '-'] <signless-integer> + if (is_n_dimension(first_value)) { + a = first_value.token().dimension_value_int(); - // Try to match any of following patterns: - // 1. An+B - // 2. An - // 3. B - // ...where "A" is "step_size", "B" is "offset" and rest are literals. - // "A" can be omitted, in that case "A" = 1. - // "A" may have "+" or "-" sign, "B" always must be predated by sign for pattern (1). + values.skip_whitespace(); + auto& second_value = values.next_token(); + if (second_value.is(Token::Type::EndOfFile)) { + // <n-dimension> + return make_return_value(); + } else if (is_signed_integer(second_value)) { + // <n-dimension> <signed-integer> + b = second_value.token().to_integer(); + return make_return_value(); + } - auto is_n = [](StyleComponentValueRule value) -> bool { - return value.is(Token::Type::Ident) && value.token().ident().equals_ignoring_case("n"); - }; + values.skip_whitespace(); + auto& third_value = values.next_token(); + if ((is_delim(second_value, "+"sv) || is_delim(second_value, "-"sv)) && is_signless_integer(third_value)) { + // <n-dimension> ['+' | '-'] <signless-integer> + b = third_value.token().to_integer() * (is_delim(second_value, "+"sv) ? 1 : -1); + return make_return_value(); + } - auto is_delim = [](StyleComponentValueRule value, StringView delim) -> bool { - return value.is(Token::Type::Delim) && value.token().delim().equals_ignoring_case(delim); - }; + return syntax_error(); + } + // <ndash-dimension> <signless-integer> + if (is_ndash_dimension(first_value)) { + values.skip_whitespace(); + auto& second_value = values.next_token(); + if (is_signless_integer(second_value)) { + a = first_value.token().dimension_value_int(); + b = -second_value.token().to_integer(); + return make_return_value(); + } - int step_size_or_offset = 0; - - // "When a=1, or a=-1, the 1 may be omitted from the rule." - if (is_n(current_value)) { - step_size_or_offset = +1; - } else if (is_delim(current_value, "+"sv) && is_n(values.peek_token())) { - step_size_or_offset = +1; - values.next_token(); - } else if (is_delim(current_value, "-"sv) && is_n(values.peek_token())) { - step_size_or_offset = -1; - values.next_token(); - } else if (current_value.is(Token::Type::Number)) { - step_size_or_offset = current_value.token().integer(); - } else { - values.reconsume_current_input_token(); + return syntax_error(); } + // <ndashdigit-dimension> + if (is_ndashdigit_dimension(first_value)) { + auto& dimension = first_value.token(); + a = dimension.dimension_value_int(); + auto maybe_b = dimension.dimension_unit().substring_view(1).to_int(); + if (maybe_b.has_value()) { + b = maybe_b.value(); + return make_return_value(); + } - current_value = values.next_token(); + return syntax_error(); + } + // <dashndashdigit-ident> + if (is_dashndashdigit_ident(first_value)) { + a = -1; + auto maybe_b = first_value.token().ident().substring_view(2).to_int(); + if (maybe_b.has_value()) { + b = maybe_b.value(); + return make_return_value(); + } - if (is_n(current_value)) { + return syntax_error(); + } + // -n + // -n <signed-integer> + // -n ['+' | '-'] <signless-integer> + if (is_dashn(first_value)) { + a = -1; values.skip_whitespace(); + auto& second_value = values.next_token(); + if (second_value.is(Token::Type::EndOfFile)) { + // -n + return make_return_value(); + } else if (is_signed_integer(second_value)) { + // -n <signed-integer> + b = second_value.token().to_integer(); + return make_return_value(); + } - auto next_value = values.peek_token(); - if (is_delim(next_value, "+") || is_delim(next_value, "-")) { - const auto sign = is_delim(next_value, "+") ? 1 : -1; - values.next_token(); + values.skip_whitespace(); + auto& third_value = values.next_token(); + if ((is_delim(second_value, "+"sv) || is_delim(second_value, "-"sv)) && is_signless_integer(third_value)) { + // -n ['+' | '-'] <signless-integer> + b = third_value.token().to_integer() * (is_delim(second_value, "+"sv) ? 1 : -1); + return make_return_value(); + } - values.skip_whitespace(); + return syntax_error(); + } + // -n- <signless-integer> + if (is_dashndash(first_value)) { + values.skip_whitespace(); + auto& second_value = values.next_token(); + if (is_signless_integer(second_value)) { + a = -1; + b = -second_value.token().to_integer(); + return make_return_value(); + } - // "An+B" pattern - auto number = values.next_token(); - if (!number.is(Token::Type::Number)) - return {}; + return syntax_error(); + } - pattern.step_size = step_size_or_offset; - pattern.offset = sign * number.token().integer(); - } else { - // "An" pattern - pattern.step_size = step_size_or_offset; + // All that's left now are these: + // '+'?† n + // '+'?† n <signed-integer> + // '+'?† n ['+' | '-'] <signless-integer> + // '+'?† n- <signless-integer> + // '+'?† <ndashdigit-ident> + // In all of these cases, the + is optional, and has no effect. + // So, we just skip the +, and carry on. + if (!is_delim(first_value, "+"sv)) { + values.reconsume_current_input_token(); + // We do *not* skip whitespace here. + } + + auto& first_after_plus = values.next_token(); + // '+'?† n + // '+'?† n <signed-integer> + // '+'?† n ['+' | '-'] <signless-integer> + if (is_n(first_after_plus)) { + a = 1; + values.skip_whitespace(); + auto& second_value = values.next_token(); + if (second_value.is(Token::Type::EndOfFile)) { + // '+'?† n + return make_return_value(); + } else if (is_signed_integer(second_value)) { + // '+'?† n <signed-integer> + b = second_value.token().to_integer(); + return make_return_value(); } - } else { - // "B" pattern - pattern.offset = step_size_or_offset; + + values.skip_whitespace(); + auto& third_value = values.next_token(); + if ((is_delim(second_value, "+"sv) || is_delim(second_value, "-"sv)) && is_signless_integer(third_value)) { + // '+'?† n ['+' | '-'] <signless-integer> + b = third_value.token().to_integer() * (is_delim(second_value, "+"sv) ? 1 : -1); + return make_return_value(); + } + + return syntax_error(); } - if (values.has_next_token()) - return {}; + // '+'?† n- <signless-integer> + if (is_ndash(first_after_plus)) { + values.skip_whitespace(); + auto& second_value = values.next_token(); + if (is_signless_integer(second_value)) { + a = 1; + b = -second_value.token().to_integer(); + return make_return_value(); + } + + return syntax_error(); + } + + // '+'?† <ndashdigit-ident> + if (is_ndashdigit_ident(first_after_plus)) { + a = 1; + auto maybe_b = first_after_plus.token().ident().substring_view(1).to_int(); + if (maybe_b.has_value()) { + b = maybe_b.value(); + return make_return_value(); + } + + return syntax_error(); + } - return pattern; + return syntax_error(); } } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h index 1e0753fac8..165da97c5e 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h @@ -112,7 +112,7 @@ public: template<typename T> Vector<Vector<StyleComponentValueRule>> parse_as_comma_separated_list_of_component_values(TokenStream<T>&); - Optional<Selector::SimpleSelector::NthChildPattern> parse_nth_child_pattern(TokenStream<StyleComponentValueRule>&); + Optional<Selector::SimpleSelector::ANPlusBPattern> parse_a_n_plus_b_pattern(TokenStream<StyleComponentValueRule>&); // FIXME: https://www.w3.org/TR/selectors-4/ // Contrary to the name, these parse a comma-separated list of selectors, according to the spec. diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Token.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Token.cpp index a83e79e019..8ec29257dd 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Token.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Token.cpp @@ -54,7 +54,7 @@ String Token::to_debug_string() const case Type::Number: builder.append("Number: "); builder.append(m_value.to_string()); - builder.append(m_unit.to_string()); + builder.append(m_number_type == NumberType::Integer ? " (int)" : " (float)"); return builder.to_string(); case Type::Percentage: builder.append("Percentage: "); diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Token.h b/Userland/Libraries/LibWeb/CSS/Parser/Token.h index ca426653ef..c34427b622 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Token.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Token.h @@ -100,11 +100,34 @@ public: } bool is(NumberType number_type) const { return is(Token::Type::Number) && m_number_type == number_type; } - - int integer() const + StringView number_string_value() const + { + VERIFY(m_type == Type::Number); + return m_value.string_view(); + } + int to_integer() const { VERIFY(m_type == Type::Number && m_number_type == NumberType::Integer); - return m_value.string_view().to_int().value(); + return number_string_value().to_int().value(); + } + bool is_integer_value_signed() const { return number_string_value().starts_with('-') || number_string_value().starts_with('+'); } + + StringView dimension_unit() const + { + VERIFY(m_type == Type::Dimension); + return m_unit.string_view(); + } + StringView dimension_value() const + { + VERIFY(m_type == Type::Dimension); + return m_value.string_view(); + } + int dimension_value_int() const { return dimension_value().to_int().value(); } + + NumberType number_type() const + { + VERIFY((m_type == Type::Number) || (m_type == Type::Dimension)); + return m_number_type; } Type mirror_variant() const; diff --git a/Userland/Libraries/LibWeb/CSS/Selector.cpp b/Userland/Libraries/LibWeb/CSS/Selector.cpp index 0ea5a6834c..1eb9ae4c69 100644 --- a/Userland/Libraries/LibWeb/CSS/Selector.cpp +++ b/Userland/Libraries/LibWeb/CSS/Selector.cpp @@ -47,11 +47,11 @@ u32 Selector::specificity() const return ids * 0x10000 + classes * 0x100 + tag_names; } -Selector::SimpleSelector::NthChildPattern Selector::SimpleSelector::NthChildPattern::parse(StringView const& args) +Selector::SimpleSelector::ANPlusBPattern Selector::SimpleSelector::ANPlusBPattern::parse(StringView const& args) { // FIXME: Remove this when the DeprecatedCSSParser is gone. // The new Parser::parse_nth_child_pattern() does the same as this, using Tokens. - CSS::Selector::SimpleSelector::NthChildPattern pattern; + CSS::Selector::SimpleSelector::ANPlusBPattern pattern; if (args.equals_ignoring_case("odd")) { pattern.step_size = 2; pattern.offset = 1; diff --git a/Userland/Libraries/LibWeb/CSS/Selector.h b/Userland/Libraries/LibWeb/CSS/Selector.h index 932289ff27..a7378c0b14 100644 --- a/Userland/Libraries/LibWeb/CSS/Selector.h +++ b/Userland/Libraries/LibWeb/CSS/Selector.h @@ -33,11 +33,15 @@ public: }; Type type { Type::Invalid }; - struct NthChildPattern { - int step_size { 0 }; - int offset = { 0 }; - - static NthChildPattern parse(StringView const& args); + struct ANPlusBPattern { + int step_size { 0 }; // "A" + int offset = { 0 }; // "B" + + static ANPlusBPattern parse(StringView const& args); + String to_string() const + { + return String::formatted("{}n{:+}", step_size, offset); + } }; struct PseudoClass { @@ -66,7 +70,7 @@ public: // FIXME: We don't need this field on every single SimpleSelector, but it's also annoying to malloc it somewhere. // Only used when "pseudo_class" is "NthChild" or "NthLastChild". - NthChildPattern nth_child_pattern; + ANPlusBPattern nth_child_pattern; SelectorList not_selector {}; }; |