diff options
author | Max Wipfli <mail@maxwipfli.ch> | 2021-06-01 21:18:08 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-06-03 13:31:46 +0200 |
commit | bc8d16ad28afb7436bfde1fd0a21faf73d652230 (patch) | |
tree | 7209f27957cc40f9cc1ce27a54f4a670a69a4683 /Userland/Libraries/LibWeb/HTML/Parser | |
parent | 1c9d87c4558304cf2e955df7e4c49f9f60cd55f2 (diff) | |
download | serenity-bc8d16ad28afb7436bfde1fd0a21faf73d652230.zip |
Everywhere: Replace ctype.h to avoid narrowing conversions
This replaces ctype.h with CharacterType.h everywhere I could find
issues with narrowing conversions. While using it will probably make
sense almost everywhere in the future, the most critical places should
have been addressed.
Diffstat (limited to 'Userland/Libraries/LibWeb/HTML/Parser')
-rw-r--r-- | Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 70 |
1 files changed, 25 insertions, 45 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 7d716a8532..638ba37bea 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -4,13 +4,13 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include <AK/CharacterTypes.h> #include <AK/Debug.h> #include <AK/SourceLocation.h> #include <LibTextCodec/Decoder.h> #include <LibWeb/HTML/Parser/Entities.h> #include <LibWeb/HTML/Parser/HTMLToken.h> #include <LibWeb/HTML/Parser/HTMLTokenizer.h> -#include <ctype.h> #include <string.h> namespace Web::HTML { @@ -93,25 +93,25 @@ namespace Web::HTML { if (!current_input_character.has_value()) #define ON_ASCII_ALPHA \ - if (current_input_character.has_value() && isalpha(current_input_character.value())) + if (current_input_character.has_value() && is_ascii_alpha(current_input_character.value())) #define ON_ASCII_ALPHANUMERIC \ - if (current_input_character.has_value() && isalnum(current_input_character.value())) + if (current_input_character.has_value() && is_ascii_alphanumeric(current_input_character.value())) #define ON_ASCII_UPPER_ALPHA \ - if (current_input_character.has_value() && current_input_character.value() >= 'A' && current_input_character.value() <= 'Z') + if (current_input_character.has_value() && is_ascii_upper_alpha(current_input_character.value())) #define ON_ASCII_LOWER_ALPHA \ - if (current_input_character.has_value() && current_input_character.value() >= 'a' && current_input_character.value() <= 'z') + if (current_input_character.has_value() && is_ascii_lower_alpha(current_input_character.value())) #define ON_ASCII_DIGIT \ - if (current_input_character.has_value() && isdigit(current_input_character.value())) + if (current_input_character.has_value() && is_ascii_digit(current_input_character.value())) #define ON_ASCII_HEX_DIGIT \ - if (current_input_character.has_value() && isxdigit(current_input_character.value())) + if (current_input_character.has_value() && is_ascii_hex_digit(current_input_character.value())) #define ON_WHITESPACE \ - if (current_input_character.has_value() && strchr("\t\n\f ", current_input_character.value())) + if (current_input_character.has_value() && is_ascii(current_input_character.value()) && "\t\n\f "sv.contains(current_input_character.value())) #define ANYTHING_ELSE if (1) @@ -172,26 +172,6 @@ static inline void log_parse_error(const SourceLocation& location = SourceLocati dbgln_if(TOKENIZER_TRACE_DEBUG, "Parse error (tokenization) {}", location); } -static inline bool is_surrogate(u32 code_point) -{ - return (code_point & 0xfffff800) == 0xd800; -} - -static inline bool is_noncharacter(u32 code_point) -{ - return code_point >= 0xfdd0 && (code_point <= 0xfdef || (code_point & 0xfffe) == 0xfffe) && code_point <= 0x10ffff; -} - -static inline bool is_c0_control(u32 code_point) -{ - return code_point <= 0x1f; -} - -static inline bool is_control(u32 code_point) -{ - return is_c0_control(code_point) || (code_point >= 0x7f && code_point <= 0x9f); -} - Optional<u32> HTMLTokenizer::next_code_point() { if (m_utf8_iterator == m_utf8_view.end()) @@ -322,7 +302,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value())); m_current_token.m_end_position = nth_last_position(0); continue; } @@ -458,7 +438,7 @@ _StartOfFunction: ON_ASCII_UPPER_ALPHA { create_new_token(HTMLToken::Type::DOCTYPE); - m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + m_current_token.m_doctype.name.append(to_ascii_lowercase(current_input_character.value())); m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } @@ -507,7 +487,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + m_current_token.m_doctype.name.append(to_ascii_lowercase(current_input_character.value())); continue; } ON(0) @@ -550,10 +530,10 @@ _StartOfFunction: } ANYTHING_ELSE { - if (toupper(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) { + if (to_ascii_uppercase(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) { SWITCH_TO(AfterDOCTYPEPublicKeyword); } - if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) { + if (to_ascii_uppercase(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) { SWITCH_TO(AfterDOCTYPESystemKeyword); } log_parse_error(); @@ -1068,7 +1048,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(tolower(current_input_character.value())); + m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(to_ascii_lowercase(current_input_character.value())); continue; } ON(0) @@ -1558,7 +1538,7 @@ _StartOfFunction: if (consumed_as_part_of_an_attribute() && !match.value().entity.ends_with(';')) { auto next_code_point = peek_code_point(0); - if (next_code_point.has_value() && (next_code_point.value() == '=' || isalnum(next_code_point.value()))) { + if (next_code_point.has_value() && (next_code_point.value() == '=' || is_ascii_alphanumeric(next_code_point.value()))) { FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; SWITCH_TO_RETURN_STATE; } @@ -1720,14 +1700,14 @@ _StartOfFunction: log_parse_error(); m_character_reference_code = 0xFFFD; } - if (is_surrogate(m_character_reference_code)) { + if (is_unicode_surrogate(m_character_reference_code)) { log_parse_error(); m_character_reference_code = 0xFFFD; } - if (is_noncharacter(m_character_reference_code)) { + if (is_unicode_noncharacter(m_character_reference_code)) { log_parse_error(); } - if (m_character_reference_code == 0xd || (is_control(m_character_reference_code) && !isspace(m_character_reference_code))) { + if (m_character_reference_code == 0xd || (is_unicode_control(m_character_reference_code) && !is_ascii_space(m_character_reference_code))) { log_parse_error(); constexpr struct { u32 number; @@ -1870,7 +1850,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value())); m_temporary_buffer.append(current_input_character.value()); continue; } @@ -1980,7 +1960,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value())); m_temporary_buffer.append(current_input_character.value()); continue; } @@ -2193,7 +2173,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value())); m_temporary_buffer.append(current_input_character.value()); continue; } @@ -2247,7 +2227,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_temporary_buffer.append(tolower(current_input_character.value())); + m_temporary_buffer.append(to_ascii_lowercase(current_input_character.value())); EMIT_CURRENT_CHARACTER; } ON_ASCII_LOWER_ALPHA @@ -2393,7 +2373,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_temporary_buffer.append(tolower(current_input_character.value())); + m_temporary_buffer.append(to_ascii_lowercase(current_input_character.value())); EMIT_CURRENT_CHARACTER; } ON_ASCII_LOWER_ALPHA @@ -2512,7 +2492,7 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value())); m_temporary_buffer.append(current_input_character.value()); continue; } @@ -2598,7 +2578,7 @@ bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitiv // FIXME: This should be more Unicode-aware. if (case_sensitivity == CaseSensitivity::CaseInsensitive) { if (code_point.value() < 0x80) { - if (tolower(code_point.value()) != tolower(string[i])) + if (to_ascii_lowercase(code_point.value()) != to_ascii_lowercase(string[i])) return false; continue; } |