diff options
author | Sam Atkins <atkinssj@serenityos.org> | 2021-11-17 21:01:51 +0000 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-11-19 22:35:05 +0100 |
commit | f6869797a7a56de4db4d9d8a06bfffe6ca520d81 (patch) | |
tree | 6cb05453febda3b2d8e730bc614e0bbb16522042 /Userland | |
parent | d2ef8b29e8becdec7c0a44e1f9c1f5cb64059d5c (diff) | |
download | serenity-f6869797a7a56de4db4d9d8a06bfffe6ca520d81.zip |
LibWeb: Convert numeric tokens to numbers in CSS Tokenizer
The spec wants us to produce numeric values as the Tokenizer sees them,
rather than waiting until the parse stage. This is a first step towards
that.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Token.h | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 90 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h | 4 |
3 files changed, 90 insertions, 5 deletions
diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Token.h b/Userland/Libraries/LibWeb/CSS/Parser/Token.h index 1449d9b35b..b7e2a96480 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Token.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Token.h @@ -152,6 +152,7 @@ private: StringBuilder m_unit; HashType m_hash_type { HashType::Unrestricted }; NumberType m_number_type { NumberType::Integer }; + double m_number_value { 0 }; Position m_start_position; Position m_end_position; diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index d7f2808720..c139e9fbf5 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -11,6 +11,7 @@ #include <AK/Vector.h> #include <LibTextCodec/Decoder.h> #include <LibWeb/CSS/Parser/Tokenizer.h> +#include <math.h> // U+FFFD REPLACEMENT CHARACTER (�) #define REPLACEMENT_CHARACTER 0xFFFD @@ -469,7 +470,84 @@ CSSNumber Tokenizer::consume_a_number() } } - return { repr.to_string(), type }; + return { repr.to_string(), convert_a_string_to_a_number(repr.string_view()), type }; +} + +// https://www.w3.org/TR/css-syntax-3/#convert-string-to-number +double Tokenizer::convert_a_string_to_a_number(StringView string) +{ + auto code_point_at = [&](size_t index) -> u32 { + if (index < string.length()) + return string[index]; + return TOKENIZER_EOF; + }; + + // This algorithm does not do any verification to ensure that the string contains only a number. + // Ensure that the string contains only a valid CSS number before calling this algorithm. + + // Divide the string into seven components, in order from left to right: + size_t position = 0; + + // 1. A sign: a single U+002B PLUS SIGN (+) or U+002D HYPHEN-MINUS (-), or the empty string. + // Let s [sign] be the number -1 if the sign is U+002D HYPHEN-MINUS (-); otherwise, let s be the number 1. + int sign = 1; + if (is_plus_sign(code_point_at(position)) || is_hyphen_minus(code_point_at(position))) { + sign = is_hyphen_minus(code_point_at(position)) ? -1 : 1; + position++; + } + + // 2. An integer part: zero or more digits. + // If there is at least one digit, let i [integer_part] be the number formed by interpreting the digits + // as a base-10 integer; otherwise, let i be the number 0. + double integer_part = 0; + while (is_ascii_digit(code_point_at(position))) { + integer_part = (integer_part * 10) + (code_point_at(position) - '0'); + position++; + } + + // 3. A decimal point: a single U+002E FULL STOP (.), or the empty string. + if (is_full_stop(code_point_at(position))) + position++; + + // 4. A fractional part: zero or more digits. + // If there is at least one digit, let f [fractional_part] be the number formed by interpreting the digits + // as a base-10 integer and d [fractional_digits] be the number of digits; otherwise, let f and d be the number 0. + double fractional_part = 0; + int fractional_digits = 0; + while (is_ascii_digit(code_point_at(position))) { + fractional_part = (fractional_part * 10) + (code_point_at(position) - '0'); + position++; + fractional_digits++; + } + + // 5. An exponent indicator: a single U+0045 LATIN CAPITAL LETTER E (E) or U+0065 LATIN SMALL LETTER E (e), + // or the empty string. + if (is_e(code_point_at(position)) || is_E(code_point_at(position))) + position++; + + // 6. An exponent sign: a single U+002B PLUS SIGN (+) or U+002D HYPHEN-MINUS (-), or the empty string. + // Let t [exponent_sign] be the number -1 if the sign is U+002D HYPHEN-MINUS (-); otherwise, let t be the number 1. + int exponent_sign = 1; + if (is_plus_sign(code_point_at(position)) || is_hyphen_minus(code_point_at(position))) { + exponent_sign = is_hyphen_minus(code_point_at(position)) ? -1 : 1; + position++; + } + + // 7. An exponent: zero or more digits. + // If there is at least one digit, let e [exponent] be the number formed by interpreting the digits as a + // base-10 integer; otherwise, let e be the number 0. + double exponent = 0; + while (is_ascii_digit(code_point_at(position))) { + exponent = (exponent * 10) + (code_point_at(position) - '0'); + position++; + } + + // NOTE: We checked before calling this function that the string is a valid number, + // so if there is anything at the end, something has gone wrong! + VERIFY(position == string.length()); + + // Return the number s·(i + f·10^-d)·10^te. + return sign * (integer_part + fractional_part * pow(10, -fractional_digits)) * pow(10, exponent_sign * exponent); } // https://www.w3.org/TR/css-syntax-3/#consume-name @@ -601,8 +679,9 @@ Token Tokenizer::consume_a_numeric_token() auto number = consume_a_number(); if (would_start_an_identifier()) { auto token = create_new_token(Token::Type::Dimension); - token.m_value.append(number.value); + token.m_value.append(number.string); token.m_number_type = number.type; + token.m_number_value = number.value; auto unit = consume_a_name(); VERIFY(!unit.is_empty() && !unit.is_whitespace()); @@ -615,13 +694,16 @@ Token Tokenizer::consume_a_numeric_token() (void)next_code_point(); auto token = create_new_token(Token::Type::Percentage); - token.m_value.append(number.value); + token.m_value.append(number.string); + token.m_number_type = number.type; + token.m_number_value = number.value; return token; } auto token = create_new_token(Token::Type::Number); - token.m_value.append(number.value); + token.m_value.append(number.string); token.m_number_type = number.type; + token.m_number_value = number.value; return token; } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h index 201ac94ae2..1e60df0a7c 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h @@ -59,7 +59,8 @@ public: class CSSNumber { public: - String value; + String string; + double value { 0 }; Token::NumberType type {}; }; @@ -86,6 +87,7 @@ private: [[nodiscard]] Token consume_a_numeric_token(); [[nodiscard]] Token consume_an_ident_like_token(); [[nodiscard]] CSSNumber consume_a_number(); + [[nodiscard]] double convert_a_string_to_a_number(StringView); [[nodiscard]] String consume_a_name(); [[nodiscard]] u32 consume_escaped_code_point(); [[nodiscard]] Token consume_a_url_token(); |