diff options
author | Andreas Kling <kling@serenityos.org> | 2020-08-03 19:06:41 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-08-03 19:06:41 +0200 |
commit | ea9ac3155d1774f13ac4e9a96605c0e85a8f299e (patch) | |
tree | 79965fb23c2c75ae48fcbf1300e40671ea90f0df /Libraries/LibWeb/HTML | |
parent | b139fb9f383911130336ac995cd2671662f9c963 (diff) | |
download | serenity-ea9ac3155d1774f13ac4e9a96605c0e85a8f299e.zip |
Unicode: s/codepoint/code_point/g
Unicode calls them "code points" so let's follow their style.
Diffstat (limited to 'Libraries/LibWeb/HTML')
-rw-r--r-- | Libraries/LibWeb/HTML/Parser/Entities.cpp | 28 | ||||
-rw-r--r-- | Libraries/LibWeb/HTML/Parser/Entities.h | 4 | ||||
-rw-r--r-- | Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp | 30 | ||||
-rw-r--r-- | Libraries/LibWeb/HTML/Parser/HTMLToken.h | 10 | ||||
-rw-r--r-- | Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 220 | ||||
-rw-r--r-- | Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h | 4 |
6 files changed, 148 insertions, 148 deletions
diff --git a/Libraries/LibWeb/HTML/Parser/Entities.cpp b/Libraries/LibWeb/HTML/Parser/Entities.cpp index 088d6f9bc9..ac4a90322e 100644 --- a/Libraries/LibWeb/HTML/Parser/Entities.cpp +++ b/Libraries/LibWeb/HTML/Parser/Entities.cpp @@ -31,12 +31,12 @@ namespace Web { namespace HTML { -Optional<EntityMatch> codepoints_from_entity(const StringView& entity) +Optional<EntityMatch> code_pointss_from_entity(const StringView& entity) { constexpr struct { StringView entity; - u32 codepoint; - } single_codepoint_entities[] = { + u32 code_points; + } single_code_points_entities[] = { { "AElig;", 0x000C6 }, { "AElig", 0x000C6 }, { "AMP;", 0x00026 }, @@ -2179,9 +2179,9 @@ Optional<EntityMatch> codepoints_from_entity(const StringView& entity) constexpr struct { StringView entity; - u32 codepoint1; - u32 codepoint2; - } double_codepoint_entities[] = { + u32 code_points1; + u32 code_points2; + } double_code_points_entities[] = { { "NotEqualTilde;", 0x02242, 0x00338 }, { "NotGreaterFullEqual;", 0x02267, 0x00338 }, { "NotGreaterGreater;", 0x0226B, 0x00338 }, @@ -2279,17 +2279,17 @@ Optional<EntityMatch> codepoints_from_entity(const StringView& entity) EntityMatch match; - for (auto& single_codepoint_entity : single_codepoint_entities) { - if (entity.starts_with(single_codepoint_entity.entity)) { - if (match.entity.is_null() || single_codepoint_entity.entity.length() > match.entity.length()) - match = { { single_codepoint_entity.codepoint }, single_codepoint_entity.entity }; + for (auto& single_code_points_entity : single_code_points_entities) { + if (entity.starts_with(single_code_points_entity.entity)) { + if (match.entity.is_null() || single_code_points_entity.entity.length() > match.entity.length()) + match = { { single_code_points_entity.code_points }, single_code_points_entity.entity }; } } - for (auto& double_codepoint_entity : double_codepoint_entities) { - if (entity.starts_with(double_codepoint_entity.entity)) { - if (match.entity.is_null() || double_codepoint_entity.entity.length() > match.entity.length()) - match = EntityMatch { { double_codepoint_entity.codepoint1, double_codepoint_entity.codepoint2 }, StringView(double_codepoint_entity.entity) }; + for (auto& double_code_points_entity : double_code_points_entities) { + if (entity.starts_with(double_code_points_entity.entity)) { + if (match.entity.is_null() || double_code_points_entity.entity.length() > match.entity.length()) + match = EntityMatch { { double_code_points_entity.code_points1, double_code_points_entity.code_points2 }, StringView(double_code_points_entity.entity) }; } } diff --git a/Libraries/LibWeb/HTML/Parser/Entities.h b/Libraries/LibWeb/HTML/Parser/Entities.h index 0e65a068a6..7d5c406082 100644 --- a/Libraries/LibWeb/HTML/Parser/Entities.h +++ b/Libraries/LibWeb/HTML/Parser/Entities.h @@ -33,11 +33,11 @@ namespace Web { namespace HTML { struct EntityMatch { - Vector<u32, 2> codepoints; + Vector<u32, 2> code_pointss; StringView entity; }; -Optional<EntityMatch> codepoints_from_entity(const StringView&); +Optional<EntityMatch> code_pointss_from_entity(const StringView&); } } diff --git a/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index 59aaef631a..d75e4b00fe 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -472,7 +472,7 @@ void HTMLDocumentParser::insert_comment(HTMLToken& token) void HTMLDocumentParser::handle_in_head(HTMLToken& token) { if (token.is_parser_whitespace()) { - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } @@ -671,7 +671,7 @@ void HTMLDocumentParser::insert_character(u32 data) void HTMLDocumentParser::handle_after_head(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } @@ -1004,17 +1004,17 @@ bool HTMLDocumentParser::is_special_tag(const FlyString& tag_name) void HTMLDocumentParser::handle_in_body(HTMLToken& token) { if (token.is_character()) { - if (token.codepoint() == 0) { + if (token.code_points() == 0) { PARSE_ERROR(); return; } if (token.is_parser_whitespace()) { reconstruct_the_active_formatting_elements(); - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } reconstruct_the_active_formatting_elements(); - insert_character(token.codepoint()); + insert_character(token.code_points()); m_frameset_ok = false; return; } @@ -1162,7 +1162,7 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) // then ignore that token and move on to the next one. // (Newlines at the start of pre blocks are ignored as an authoring convenience.) auto next_token = m_tokenizer.next_token(); - if (next_token.has_value() && next_token.value().is_character() && next_token.value().codepoint() == '\n') { + if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_points() == '\n') { // Ignore it. } else { process_using_the_rules_for(m_insertion_mode, next_token.value()); @@ -1503,7 +1503,7 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) m_frameset_ok = false; m_insertion_mode = InsertionMode::Text; - if (next_token.has_value() && next_token.value().is_character() && next_token.value().codepoint() == '\n') { + if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_points() == '\n') { // Ignore it. } else { process_using_the_rules_for(m_insertion_mode, next_token.value()); @@ -1750,7 +1750,7 @@ void HTMLDocumentParser::decrement_script_nesting_level() void HTMLDocumentParser::handle_text(HTMLToken& token) { if (token.is_character()) { - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } if (token.is_end_of_file()) { @@ -1979,7 +1979,7 @@ void HTMLDocumentParser::handle_in_cell(HTMLToken& token) void HTMLDocumentParser::handle_in_table_text(HTMLToken& token) { if (token.is_character()) { - if (token.codepoint() == 0) { + if (token.code_points() == 0) { PARSE_ERROR(); return; } @@ -2000,7 +2000,7 @@ void HTMLDocumentParser::handle_in_table_text(HTMLToken& token) } for (auto& pending_token : m_pending_table_character_tokens) { - insert_character(pending_token.codepoint()); + insert_character(pending_token.code_points()); } m_insertion_mode = m_original_insertion_mode; @@ -2210,11 +2210,11 @@ void HTMLDocumentParser::handle_in_select_in_table(HTMLToken& token) void HTMLDocumentParser::handle_in_select(HTMLToken& token) { if (token.is_character()) { - if (token.codepoint() == 0) { + if (token.code_points() == 0) { PARSE_ERROR(); return; } - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } @@ -2384,7 +2384,7 @@ void HTMLDocumentParser::handle_in_caption(HTMLToken& token) void HTMLDocumentParser::handle_in_column_group(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } @@ -2527,7 +2527,7 @@ void HTMLDocumentParser::handle_in_template(HTMLToken& token) void HTMLDocumentParser::handle_in_frameset(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } @@ -2587,7 +2587,7 @@ void HTMLDocumentParser::handle_in_frameset(HTMLToken& token) void HTMLDocumentParser::handle_after_frameset(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { - insert_character(token.codepoint()); + insert_character(token.code_points()); return; } diff --git a/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Libraries/LibWeb/HTML/Parser/HTMLToken.h index 13e12e9a83..3ddd495046 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLToken.h +++ b/Libraries/LibWeb/HTML/Parser/HTMLToken.h @@ -50,11 +50,11 @@ public: EndOfFile, }; - static HTMLToken make_character(u32 codepoint) + static HTMLToken make_character(u32 code_points) { HTMLToken token; token.m_type = Type::Character; - token.m_comment_or_character.data.append(codepoint); + token.m_comment_or_character.data.append(code_points); return token; } @@ -73,11 +73,11 @@ public: bool is_character() const { return m_type == Type::Character; } bool is_end_of_file() const { return m_type == Type::EndOfFile; } - u32 codepoint() const + u32 code_points() const { ASSERT(is_character()); Utf8View view(m_comment_or_character.data.string_view()); - ASSERT(view.length_in_codepoints() == 1); + ASSERT(view.length_in_code_pointss() == 1); return *view.begin(); } @@ -86,7 +86,7 @@ public: // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not. if (!is_character()) return false; - switch (codepoint()) { + switch (code_points()) { case '\t': case '\n': case '\f': diff --git a/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index cdc8ec8044..c9266b909b 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -46,7 +46,7 @@ namespace Web::HTML { #endif #define CONSUME_NEXT_INPUT_CHARACTER \ - current_input_character = next_codepoint(); + current_input_character = next_code_points(); #define SWITCH_TO(new_state) \ do { \ @@ -86,22 +86,22 @@ namespace Web::HTML { return m_queued_tokens.dequeue(); \ } while (0) -#define EMIT_CHARACTER_AND_RECONSUME_IN(codepoint, new_state) \ +#define EMIT_CHARACTER_AND_RECONSUME_IN(code_points, new_state) \ do { \ - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); \ + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); \ will_reconsume_in(State::new_state); \ m_state = State::new_state; \ goto new_state; \ } while (0) -#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \ +#define FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \ do { \ - for (auto codepoint : m_temporary_buffer) { \ + for (auto code_points : m_temporary_buffer) { \ if (consumed_as_part_of_an_attribute()) { \ - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(codepoint); \ + m_current_token.m_tag.attributes.last().value_builder.append_code_points(code_points); \ } else { \ create_new_token(HTMLToken::Type::Character); \ - m_current_token.m_comment_or_character.data.append_codepoint(codepoint); \ + m_current_token.m_comment_or_character.data.append_code_points(code_points); \ m_queued_tokens.enqueue(m_current_token); \ } \ } \ @@ -112,8 +112,8 @@ namespace Web::HTML { m_utf8_iterator = m_prev_utf8_iterator; \ } while (0) -#define ON(codepoint) \ - if (current_input_character.has_value() && current_input_character.value() == codepoint) +#define ON(code_points) \ + if (current_input_character.has_value() && current_input_character.value() == code_points) #define ON_EOF \ if (!current_input_character.has_value()) @@ -159,10 +159,10 @@ namespace Web::HTML { return m_queued_tokens.dequeue(); \ } while (0) -#define EMIT_CHARACTER(codepoint) \ +#define EMIT_CHARACTER(code_points) \ do { \ create_new_token(HTMLToken::Type::Character); \ - m_current_token.m_comment_or_character.data.append_codepoint(codepoint); \ + m_current_token.m_comment_or_character.data.append_code_points(code_points); \ m_queued_tokens.enqueue(m_current_token); \ return m_queued_tokens.dequeue(); \ } while (0) @@ -170,11 +170,11 @@ namespace Web::HTML { #define EMIT_CURRENT_CHARACTER \ EMIT_CHARACTER(current_input_character.value()); -#define SWITCH_TO_AND_EMIT_CHARACTER(codepoint, new_state) \ +#define SWITCH_TO_AND_EMIT_CHARACTER(code_points, new_state) \ do { \ will_switch_to(State::new_state); \ m_state = State::new_state; \ - EMIT_CHARACTER(codepoint); \ + EMIT_CHARACTER(code_points); \ } while (0) #define SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(new_state) \ @@ -193,39 +193,39 @@ namespace Web::HTML { } \ } -static inline bool is_surrogate(u32 codepoint) +static inline bool is_surrogate(u32 code_points) { - return (codepoint & 0xfffff800) == 0xd800; + return (code_points & 0xfffff800) == 0xd800; } -static inline bool is_noncharacter(u32 codepoint) +static inline bool is_noncharacter(u32 code_points) { - return codepoint >= 0xfdd0 && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe) && codepoint <= 0x10ffff; + return code_points >= 0xfdd0 && (code_points <= 0xfdef || (code_points & 0xfffe) == 0xfffe) && code_points <= 0x10ffff; } -static inline bool is_c0_control(u32 codepoint) +static inline bool is_c0_control(u32 code_points) { - return codepoint <= 0x1f; + return code_points <= 0x1f; } -static inline bool is_control(u32 codepoint) +static inline bool is_control(u32 code_points) { - return is_c0_control(codepoint) || (codepoint >= 0x7f && codepoint <= 0x9f); + return is_c0_control(code_points) || (code_points >= 0x7f && code_points <= 0x9f); } -Optional<u32> HTMLTokenizer::next_codepoint() +Optional<u32> HTMLTokenizer::next_code_points() { if (m_utf8_iterator == m_utf8_view.end()) return {}; m_prev_utf8_iterator = m_utf8_iterator; ++m_utf8_iterator; #ifdef TOKENIZER_TRACE - dbg() << "(Tokenizer) Next codepoint: " << (char)*m_prev_utf8_iterator; + dbg() << "(Tokenizer) Next code_points: " << (char)*m_prev_utf8_iterator; #endif return *m_prev_utf8_iterator; } -Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const +Optional<u32> HTMLTokenizer::peek_code_points(size_t offset) const { auto it = m_utf8_iterator; for (size_t i = 0; i < offset && it != m_utf8_view.end(); ++i) @@ -242,7 +242,7 @@ _StartOfFunction: return m_queued_tokens.dequeue(); for (;;) { - auto current_input_character = next_codepoint(); + auto current_input_character = next_code_points(); switch (m_state) { BEGIN_STATE(Data) { @@ -328,7 +328,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_tag.tag_name.append_codepoint(0xFFFD); + m_current_token.m_tag.tag_name.append_code_points(0xFFFD); continue; } ON_EOF @@ -338,7 +338,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_tag.tag_name.append_codepoint(current_input_character.value()); + m_current_token.m_tag.tag_name.append_code_points(current_input_character.value()); continue; } } @@ -408,12 +408,12 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_comment_or_character.data.append_codepoint(0xFFFD); + m_current_token.m_comment_or_character.data.append_code_points(0xFFFD); continue; } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value()); + m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value()); continue; } } @@ -462,7 +462,7 @@ _StartOfFunction: { PARSE_ERROR(); create_new_token(HTMLToken::Type::DOCTYPE); - m_current_token.m_doctype.name.append_codepoint(0xFFFD); + m_current_token.m_doctype.name.append_code_points(0xFFFD); m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } @@ -484,7 +484,7 @@ _StartOfFunction: ANYTHING_ELSE { create_new_token(HTMLToken::Type::DOCTYPE); - m_current_token.m_doctype.name.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.name.append_code_points(current_input_character.value()); m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } @@ -509,7 +509,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_doctype.name.append_codepoint(0xFFFD); + m_current_token.m_doctype.name.append_code_points(0xFFFD); continue; } ON_EOF @@ -521,7 +521,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_doctype.name.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.name.append_code_points(current_input_character.value()); continue; } } @@ -732,7 +732,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_doctype.public_identifier.append_codepoint(0xFFFD); + m_current_token.m_doctype.public_identifier.append_code_points(0xFFFD); continue; } ON('>') @@ -750,7 +750,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_doctype.public_identifier.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.public_identifier.append_code_points(current_input_character.value()); continue; } } @@ -765,7 +765,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_doctype.public_identifier.append_codepoint(0xFFFD); + m_current_token.m_doctype.public_identifier.append_code_points(0xFFFD); continue; } ON('>') @@ -783,7 +783,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_doctype.public_identifier.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.public_identifier.append_code_points(current_input_character.value()); continue; } } @@ -798,7 +798,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_doctype.system_identifier.append_codepoint(0xFFFD); + m_current_token.m_doctype.system_identifier.append_code_points(0xFFFD); continue; } ON('>') @@ -816,7 +816,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_doctype.system_identifier.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.system_identifier.append_code_points(current_input_character.value()); continue; } } @@ -831,7 +831,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_doctype.system_identifier.append_codepoint(0xFFFD); + m_current_token.m_doctype.system_identifier.append_code_points(0xFFFD); continue; } ON('>') @@ -849,7 +849,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_doctype.system_identifier.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.system_identifier.append_code_points(current_input_character.value()); continue; } } @@ -1003,7 +1003,7 @@ _StartOfFunction: { PARSE_ERROR(); auto new_attribute = HTMLToken::AttributeBuilder(); - new_attribute.local_name_builder.append_codepoint(current_input_character.value()); + new_attribute.local_name_builder.append_code_points(current_input_character.value()); m_current_token.m_tag.attributes.append(new_attribute); SWITCH_TO(AttributeName); } @@ -1059,13 +1059,13 @@ _StartOfFunction: } ON_ASCII_UPPER_ALPHA { - m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(tolower(current_input_character.value())); + m_current_token.m_tag.attributes.last().local_name_builder.append_code_points(tolower(current_input_character.value())); continue; } ON(0) { PARSE_ERROR(); - m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(0xFFFD); + m_current_token.m_tag.attributes.last().local_name_builder.append_code_points(0xFFFD); continue; } ON('"') @@ -1086,7 +1086,7 @@ _StartOfFunction: ANYTHING_ELSE { AnythingElseAttributeName: - m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(current_input_character.value()); + m_current_token.m_tag.attributes.last().local_name_builder.append_code_points(current_input_character.value()); continue; } } @@ -1163,7 +1163,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(0xFFFD); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(0xFFFD); continue; } ON_EOF @@ -1173,7 +1173,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value()); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value()); continue; } } @@ -1193,7 +1193,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(0xFFFD); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(0xFFFD); continue; } ON_EOF @@ -1203,7 +1203,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value()); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value()); continue; } } @@ -1227,7 +1227,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(0xFFFD); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(0xFFFD); continue; } ON('"') @@ -1263,7 +1263,7 @@ _StartOfFunction: ANYTHING_ELSE { AnythingElseAttributeValueUnquoted: - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value()); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value()); continue; } } @@ -1343,7 +1343,7 @@ _StartOfFunction: { ON('<') { - m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value()); + m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value()); SWITCH_TO(CommentLessThanSign); } ON('-') @@ -1353,7 +1353,7 @@ _StartOfFunction: ON(0) { PARSE_ERROR(); - m_current_token.m_comment_or_character.data.append_codepoint(0xFFFD); + m_current_token.m_comment_or_character.data.append_code_points(0xFFFD); continue; } ON_EOF @@ -1364,7 +1364,7 @@ _StartOfFunction: } ANYTHING_ELSE { - m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value()); + m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value()); continue; } } @@ -1449,12 +1449,12 @@ _StartOfFunction: { ON('!') { - m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value()); + m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value()); SWITCH_TO(CommentLessThanSignBang); } ON('<') { - m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value()); + m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value()); continue; } ANYTHING_ELSE @@ -1533,7 +1533,7 @@ _StartOfFunction: { size_t byte_offset = m_utf8_view.byte_offset_of(m_prev_utf8_iterator); - auto match = HTML::codepoints_from_entity(m_decoded_input.substring_view(byte_offset, m_decoded_input.length() - byte_offset - 1)); + auto match = HTML::code_pointss_from_entity(m_decoded_input.substring_view(byte_offset, m_decoded_input.length() - byte_offset - 1)); if (match.has_value()) { for (size_t i = 0; i < match.value().entity.length() - 1; ++i) { @@ -1543,18 +1543,18 @@ _StartOfFunction: for (auto ch : match.value().entity) m_temporary_buffer.append(ch); - if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') { - auto next = peek_codepoint(0); + if (consumed_as_part_of_an_attribute() && match.value().code_pointss.last() != ';') { + auto next = peek_code_points(0); if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) { - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; SWITCH_TO_RETURN_STATE; } } if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) { - auto next_codepoint = peek_codepoint(0); - if (next_codepoint.has_value() && next_codepoint.value() == '=') { - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + auto next_code_points = peek_code_points(0); + if (next_code_points.has_value() && next_code_points.value() == '=') { + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; SWITCH_TO_RETURN_STATE; } } @@ -1564,12 +1564,12 @@ _StartOfFunction: } m_temporary_buffer.clear(); - m_temporary_buffer.append(match.value().codepoints); + m_temporary_buffer.append(match.value().code_pointss); - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; SWITCH_TO_RETURN_STATE; } else { - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; SWITCH_TO(AmbiguousAmpersand); } } @@ -1580,7 +1580,7 @@ _StartOfFunction: ON_ASCII_ALPHANUMERIC { if (consumed_as_part_of_an_attribute()) { - m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value()); + m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value()); continue; } else { EMIT_CURRENT_CHARACTER; @@ -1628,7 +1628,7 @@ _StartOfFunction: ANYTHING_ELSE { PARSE_ERROR(); - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; RECONSUME_IN_RETURN_STATE; } } @@ -1643,7 +1643,7 @@ _StartOfFunction: ANYTHING_ELSE { PARSE_ERROR(); - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; RECONSUME_IN_RETURN_STATE; } } @@ -1724,7 +1724,7 @@ _StartOfFunction: PARSE_ERROR(); constexpr struct { u32 number; - u32 codepoint; + u32 code_points; } conversion_table[] = { { 0x80, 0x20AC }, { 0x82, 0x201A }, @@ -1756,7 +1756,7 @@ _StartOfFunction: }; for (auto& entry : conversion_table) { if (m_character_reference_code == entry.number) { - m_character_reference_code = entry.codepoint; + m_character_reference_code = entry.code_points; break; } } @@ -1764,7 +1764,7 @@ _StartOfFunction: m_temporary_buffer.clear(); m_temporary_buffer.append(m_character_reference_code); - FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; SWITCH_TO_RETURN_STATE; } END_STATE @@ -1833,8 +1833,8 @@ _StartOfFunction: if (!current_end_tag_token_is_appropriate()) { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RCDATA); } SWITCH_TO(BeforeAttributeName); @@ -1844,8 +1844,8 @@ _StartOfFunction: if (!current_end_tag_token_is_appropriate()) { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RCDATA); } SWITCH_TO(SelfClosingStartTag); @@ -1855,8 +1855,8 @@ _StartOfFunction: if (!current_end_tag_token_is_appropriate()) { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RCDATA); } SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); @@ -1869,7 +1869,7 @@ _StartOfFunction: } ON_ASCII_LOWER_ALPHA { - m_current_token.m_tag.tag_name.append_codepoint(current_input_character.value()); + m_current_token.m_tag.tag_name.append_code_points(current_input_character.value()); m_temporary_buffer.append(current_input_character.value()); continue; } @@ -1877,8 +1877,8 @@ _StartOfFunction: { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RCDATA); } } @@ -1943,8 +1943,8 @@ _StartOfFunction: if (!current_end_tag_token_is_appropriate()) { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RAWTEXT); } SWITCH_TO(BeforeAttributeName); @@ -1954,8 +1954,8 @@ _StartOfFunction: if (!current_end_tag_token_is_appropriate()) { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RAWTEXT); } SWITCH_TO(SelfClosingStartTag); @@ -1965,8 +1965,8 @@ _StartOfFunction: if (!current_end_tag_token_is_appropriate()) { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RAWTEXT); } SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); @@ -1987,8 +1987,8 @@ _StartOfFunction: { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(RAWTEXT); } } @@ -2155,8 +2155,8 @@ _StartOfFunction: m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) { - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) { + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); } RECONSUME_IN(ScriptDataEscaped); } @@ -2167,8 +2167,8 @@ _StartOfFunction: m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) { - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) { + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); } RECONSUME_IN(ScriptDataEscaped); } @@ -2179,8 +2179,8 @@ _StartOfFunction: m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) { - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) { + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); } RECONSUME_IN(ScriptDataEscaped); } @@ -2200,8 +2200,8 @@ _StartOfFunction: { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) { - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) { + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); } RECONSUME_IN(ScriptDataEscaped); } @@ -2479,8 +2479,8 @@ _StartOfFunction: SWITCH_TO(BeforeAttributeName); m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(ScriptData); } ON('/') @@ -2489,8 +2489,8 @@ _StartOfFunction: SWITCH_TO(SelfClosingStartTag); m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(ScriptData); } ON('>') @@ -2499,8 +2499,8 @@ _StartOfFunction: SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(ScriptData); } ON_ASCII_UPPER_ALPHA @@ -2519,8 +2519,8 @@ _StartOfFunction: { m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); - for (auto codepoint : m_temporary_buffer) - m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); + for (auto code_points : m_temporary_buffer) + m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); RECONSUME_IN(ScriptData); } } @@ -2585,18 +2585,18 @@ _StartOfFunction: bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitivity case_sensitivity) { for (size_t i = 0; i < string.length(); ++i) { - auto codepoint = peek_codepoint(i); - if (!codepoint.has_value()) + auto code_points = peek_code_points(i); + if (!code_points.has_value()) return false; // FIXME: This should be more Unicode-aware. if (case_sensitivity == CaseSensitivity::CaseInsensitive) { - if (codepoint.value() < 0x80) { - if (tolower(codepoint.value()) != tolower(string[i])) + if (code_points.value() < 0x80) { + if (tolower(code_points.value()) != tolower(string[i])) return false; continue; } } - if (codepoint.value() != (u32)string[i]) + if (code_points.value() != (u32)string[i]) return false; } for (size_t i = 0; i < string.length(); ++i) { diff --git a/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h index f4aa69ab91..b01e74a1f9 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h +++ b/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h @@ -137,8 +137,8 @@ public: String source() const { return m_decoded_input; } private: - Optional<u32> next_codepoint(); - Optional<u32> peek_codepoint(size_t offset) const; + Optional<u32> next_code_points(); + Optional<u32> peek_code_points(size_t offset) const; bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive); void create_new_token(HTMLToken::Type); bool current_end_tag_token_is_appropriate() const; |