From 701b7810ba0ab6efc3b990e77509e3e8a1de2006 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Tue, 18 Jan 2022 08:29:47 -0500 Subject: LibUnicode: Generate code point abbreviations --- .../LibUnicode/GenerateUnicodeData.cpp | 36 ++++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'Meta') diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index c8b2865582..eedf0e638a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -72,6 +72,7 @@ struct CodePointName { struct CodePointData { u32 code_point { 0 }; String name; + Optional abbreviation; u8 canonical_combining_class { 0 }; String bidi_class; String decomposition_type; @@ -101,6 +102,7 @@ struct UnicodeData { Vector code_point_data; + HashMap code_point_abbreviations; HashMap code_point_display_name_aliases; Vector code_point_display_names; @@ -302,11 +304,12 @@ static void parse_name_aliases(Core::File& file, UnicodeData& unicode_data) auto alias = segments[1].trim_whitespace(); auto reason = segments[2].trim_whitespace(); - if (!reason.is_one_of("correction"sv, "control"sv)) - continue; - - if (!unicode_data.code_point_display_name_aliases.contains(*code_point)) - unicode_data.code_point_display_name_aliases.set(*code_point, alias); + if (reason == "abbreviation"sv) { + unicode_data.code_point_abbreviations.set(*code_point, alias); + } else if (reason.is_one_of("correction"sv, "control"sv)) { + if (!unicode_data.code_point_display_name_aliases.contains(*code_point)) + unicode_data.code_point_display_name_aliases.set(*code_point, alias); + } } } @@ -476,6 +479,9 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data) data.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex(segments[13]); data.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex(segments[14]); + if (auto abbreviation = unicode_data.code_point_abbreviations.get(data.code_point); abbreviation.has_value()) + data.abbreviation = *abbreviation; + if (!assigned_code_point_range_start.has_value()) assigned_code_point_range_start = data.code_point; @@ -683,6 +689,11 @@ struct SpecialCaseMapping { u32 special_casing_size { 0 }; }; +struct CodePointAbbreviation { + u32 code_point { 0 }; + StringView abbreviation {}; +}; + template struct CodePointComparator { constexpr int operator()(u32 code_point, MappingType const& mapping) @@ -707,7 +718,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { for (auto const& data : unicode_data.code_point_data) { auto mapping = mapping_getter(data); - if constexpr (IsSame>) { + if constexpr (requires { mapping.has_value(); }) { if (!mapping.has_value()) continue; } else { @@ -724,6 +735,9 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { if constexpr (IsSame>) { generator.set("mapping", String::formatted("{:#x}", *mapping)); generator.append(", @mapping@ },"); + } else if constexpr (IsSame>) { + generator.set("mapping", String::formatted("{}", *mapping)); + generator.append(", \"@mapping@\"sv },"); } else { append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv); generator.append(" },"); @@ -748,6 +762,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; }); append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; }); append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; }); + append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; }); generator.append(R"~~~( struct CodePointRange { @@ -891,6 +906,15 @@ Span special_case_mapping(u32 code_point) return mapping->special_casing.span().slice(0, mapping->special_casing_size); } + +Optional code_point_abbreviation(u32 code_point) +{ + auto const* mapping = binary_search(s_abbreviation_mappings, code_point, nullptr, CodePointComparator {}); + if (mapping == nullptr) + return {}; + + return mapping->abbreviation; +} )~~~"); auto append_prop_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) { -- cgit v1.2.3