diff options
author | Sam Atkins <atkinssj@serenityos.org> | 2022-10-03 16:05:07 +0100 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2022-10-03 17:09:41 +0100 |
commit | 97e174afcd1990d1aefcf603a0a8f72046486695 (patch) | |
tree | 3f1c30dc21a5d47bfdf267477f107300aaff5559 /Userland/Libraries | |
parent | 7cab86ad2899e528a52c42056f4286be8624e938 (diff) | |
download | serenity-97e174afcd1990d1aefcf603a0a8f72046486695.zip |
LibWeb: Use the term "ident sequence" instead of "name"
This is an editorial change in the December 2021 version of SYNTAX-3:
https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/
Diffstat (limited to 'Userland/Libraries')
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 60 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h | 4 |
2 files changed, 33 insertions, 31 deletions
diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 29131fcea2..2399e0d39a 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2020-2022, the SerenityOS developers. - * Copyright (c) 2021, Sam Atkins <atkinssj@serenityos.org> + * Copyright (c) 2021-2022, Sam Atkins <atkinssj@serenityos.org> * * SPDX-License-Identifier: BSD-2-Clause */ @@ -44,7 +44,8 @@ static inline bool is_low_line(u32 code_point) return code_point == 0x5F; } -static inline bool is_name_start_code_point(u32 code_point) +// https://www.w3.org/TR/css-syntax-3/#ident-start-code-point +static inline bool is_ident_start_code_point(u32 code_point) { // FIXME: We use !is_ascii() for "non-ASCII code point" in the spec, but it's not quite right - // it treats EOF as a valid! The spec also lacks a definition of code point. For now, the @@ -57,9 +58,10 @@ static inline bool is_hyphen_minus(u32 code_point) return code_point == 0x2D; } -static inline bool is_name_code_point(u32 code_point) +// https://www.w3.org/TR/css-syntax-3/#ident-code-point +static inline bool is_ident_code_point(u32 code_point) { - return is_name_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point); + return is_ident_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point); } static inline bool is_non_printable(u32 code_point) @@ -424,8 +426,8 @@ Token Tokenizer::consume_an_ident_like_token() // This section describes how to consume an ident-like token from a stream of code points. // It returns an <ident-token>, <function-token>, <url-token>, or <bad-url-token>. - // Consume a name, and let string be the result. - auto string = consume_a_name(); + // Consume an ident sequence, and let string be the result. + auto string = consume_an_ident_sequence(); // If string’s value is an ASCII case-insensitive match for "url", and the next input code // point is U+0028 LEFT PARENTHESIS ((), consume it. @@ -640,15 +642,15 @@ float Tokenizer::convert_a_string_to_a_number(StringView string) } // https://www.w3.org/TR/css-syntax-3/#consume-name -String Tokenizer::consume_a_name() +String Tokenizer::consume_an_ident_sequence() { - // This section describes how to consume a name from a stream of code points. + // This section describes how to consume an ident sequence from a stream of code points. // It returns a string containing the largest name that can be formed from adjacent // code points in the stream, starting from the first. // // Note: This algorithm does not do the verification of the first few code points that // are necessary to ensure the returned code points would constitute an <ident-token>. - // If that is the intended use, ensure that the stream starts with an identifier before + // If that is the intended use, ensure that the stream starts with an ident sequence before // calling this algorithm. // Let result initially be an empty string. @@ -662,7 +664,7 @@ String Tokenizer::consume_a_name() break; // name code point - if (is_name_code_point(input)) { + if (is_ident_code_point(input)) { // Append the code point to result. result.append_code_point(input); continue; @@ -837,15 +839,15 @@ Token Tokenizer::consume_a_numeric_token() // Consume a number and let number be the result. auto number = consume_a_number(); - // If the next 3 input code points would start an identifier, then: - if (would_start_an_identifier(peek_triplet())) { + // If the next 3 input code points would start an ident sequence, then: + if (would_start_an_ident_sequence(peek_triplet())) { // 1. Create a <dimension-token> with the same value and type flag as number, // and a unit set initially to the empty string. auto token = create_new_token(Token::Type::Dimension); token.m_number_value = number; - // 2. Consume a name. Set the <dimension-token>’s unit to the returned value. - auto unit = consume_a_name(); + // 2. Consume an ident sequence. Set the <dimension-token>’s unit to the returned value. + auto unit = consume_an_ident_sequence(); VERIFY(!unit.is_empty()); // NOTE: We intentionally store this in the `value`, to save space. token.m_value = move(unit); @@ -938,9 +940,9 @@ bool Tokenizer::is_valid_escape_sequence(U32Twin values) } // https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier -bool Tokenizer::would_start_an_identifier(U32Triplet values) +bool Tokenizer::would_start_an_ident_sequence(U32Triplet values) { - // This section describes how to check if three code points would start an identifier. + // This section describes how to check if three code points would start an ident sequence. // The algorithm described here can be called explicitly with three code points, or // can be called with the input stream itself. In the latter case, the three code // points in question are the current input code point and the next two input code @@ -954,14 +956,14 @@ bool Tokenizer::would_start_an_identifier(U32Triplet values) if (is_hyphen_minus(values.first)) { // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS, // or the second and third code points are a valid escape, return true. - if (is_name_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23())) + if (is_ident_start_code_point(values.second) || is_hyphen_minus(values.second) || is_valid_escape_sequence(values.to_twin_23())) return true; // Otherwise, return false. return false; } // name-start code point - if (is_name_start_code_point(values.first)) { + if (is_ident_start_code_point(values.first)) { // Return true. return true; } @@ -1117,22 +1119,22 @@ Token Tokenizer::consume_a_token() if (is_number_sign(input)) { dbgln_if(CSS_TOKENIZER_DEBUG, "is number sign"); - // If the next input code point is a name code point or the next two input code points + // If the next input code point is an ident code point or the next two input code points // are a valid escape, then: auto next_input = peek_code_point(); auto maybe_escape = peek_twin(); - if (is_name_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) { + if (is_ident_code_point(next_input) || is_valid_escape_sequence(maybe_escape)) { // 1. Create a <hash-token>. auto token = create_new_token(Token::Type::Hash); - // 2. If the next 3 input code points would start an identifier, set the <hash-token>’s + // 2. If the next 3 input code points would start an ident sequence, set the <hash-token>’s // type flag to "id". - if (would_start_an_identifier(peek_triplet())) + if (would_start_an_ident_sequence(peek_triplet())) token.m_hash_type = Token::HashType::Id; - // 3. Consume a name, and set the <hash-token>’s value to the returned string. - auto name = consume_a_name(); + // 3. Consume an ident sequence, and set the <hash-token>’s value to the returned string. + auto name = consume_an_ident_sequence(); token.m_value = move(name); // 4. Return the <hash-token>. @@ -1207,7 +1209,7 @@ Token Tokenizer::consume_a_token() // Otherwise, if the input stream starts with an identifier, reconsume the current // input code point, consume an ident-like token, and return it. - if (would_start_an_identifier(start_of_input_stream_triplet())) { + if (would_start_an_ident_sequence(start_of_input_stream_triplet())) { reconsume_current_input_code_point(); return consume_an_ident_like_token(); } @@ -1265,10 +1267,10 @@ Token Tokenizer::consume_a_token() // U+0040 COMMERCIAL AT (@) if (is_at(input)) { dbgln_if(CSS_TOKENIZER_DEBUG, "is at"); - // If the next 3 input code points would start an identifier, consume a name, create + // If the next 3 input code points would start an ident sequence, consume an ident sequence, create // an <at-keyword-token> with its value set to the returned value, and return it. - if (would_start_an_identifier(peek_triplet())) { - auto name = consume_a_name(); + if (would_start_an_ident_sequence(peek_triplet())) { + auto name = consume_an_ident_sequence(); return create_value_token(Token::Type::AtKeyword, name); } @@ -1329,7 +1331,7 @@ Token Tokenizer::consume_a_token() } // name-start code point - if (is_name_start_code_point(input)) { + if (is_ident_start_code_point(input)) { dbgln_if(CSS_TOKENIZER_DEBUG, "is name start"); // Reconsume the current input code point, consume an ident-like token, and return it. reconsume_current_input_code_point(); diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h index d70e5b42c4..92f4530e01 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h @@ -84,7 +84,7 @@ private: [[nodiscard]] Token consume_an_ident_like_token(); [[nodiscard]] Number consume_a_number(); [[nodiscard]] float convert_a_string_to_a_number(StringView); - [[nodiscard]] String consume_a_name(); + [[nodiscard]] String consume_an_ident_sequence(); [[nodiscard]] u32 consume_escaped_code_point(); [[nodiscard]] Token consume_a_url_token(); void consume_the_remnants_of_a_bad_url(); @@ -92,7 +92,7 @@ private: void consume_as_much_whitespace_as_possible(); void reconsume_current_input_code_point(); [[nodiscard]] static bool is_valid_escape_sequence(U32Twin); - [[nodiscard]] static bool would_start_an_identifier(U32Triplet); + [[nodiscard]] static bool would_start_an_ident_sequence(U32Triplet); [[nodiscard]] static bool would_start_a_number(U32Triplet); String m_decoded_input; |