diff options
author | Timothy Flynn <trflynn89@pm.me> | 2022-01-18 08:29:47 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2022-01-18 15:13:25 +0000 |
commit | 701b7810ba0ab6efc3b990e77509e3e8a1de2006 (patch) | |
tree | 42471035e92dd291d43bc7aa3cde89f7e786aa84 /Meta | |
parent | 444b2d9ec2e962235e2ea7c15c8457c8627d7ca2 (diff) | |
download | serenity-701b7810ba0ab6efc3b990e77509e3e8a1de2006.zip |
LibUnicode: Generate code point abbreviations
Diffstat (limited to 'Meta')
-rw-r--r-- | Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp | 36 |
1 files changed, 30 insertions, 6 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index c8b2865582..eedf0e638a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -72,6 +72,7 @@ struct CodePointName { struct CodePointData { u32 code_point { 0 }; String name; + Optional<StringView> abbreviation; u8 canonical_combining_class { 0 }; String bidi_class; String decomposition_type; @@ -101,6 +102,7 @@ struct UnicodeData { Vector<CodePointData> code_point_data; + HashMap<u32, String> code_point_abbreviations; HashMap<u32, String> code_point_display_name_aliases; Vector<CodePointName> code_point_display_names; @@ -302,11 +304,12 @@ static void parse_name_aliases(Core::File& file, UnicodeData& unicode_data) auto alias = segments[1].trim_whitespace(); auto reason = segments[2].trim_whitespace(); - if (!reason.is_one_of("correction"sv, "control"sv)) - continue; - - if (!unicode_data.code_point_display_name_aliases.contains(*code_point)) - unicode_data.code_point_display_name_aliases.set(*code_point, alias); + if (reason == "abbreviation"sv) { + unicode_data.code_point_abbreviations.set(*code_point, alias); + } else if (reason.is_one_of("correction"sv, "control"sv)) { + if (!unicode_data.code_point_display_name_aliases.contains(*code_point)) + unicode_data.code_point_display_name_aliases.set(*code_point, alias); + } } } @@ -476,6 +479,9 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data) data.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[13]); data.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[14]); + if (auto abbreviation = unicode_data.code_point_abbreviations.get(data.code_point); abbreviation.has_value()) + data.abbreviation = *abbreviation; + if (!assigned_code_point_range_start.has_value()) assigned_code_point_range_start = data.code_point; @@ -683,6 +689,11 @@ struct SpecialCaseMapping { u32 special_casing_size { 0 }; }; +struct CodePointAbbreviation { + u32 code_point { 0 }; + StringView abbreviation {}; +}; + template<typename MappingType> struct CodePointComparator { constexpr int operator()(u32 code_point, MappingType const& mapping) @@ -707,7 +718,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { for (auto const& data : unicode_data.code_point_data) { auto mapping = mapping_getter(data); - if constexpr (IsSame<decltype(mapping), Optional<u32>>) { + if constexpr (requires { mapping.has_value(); }) { if (!mapping.has_value()) continue; } else { @@ -724,6 +735,9 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { if constexpr (IsSame<decltype(mapping), Optional<u32>>) { generator.set("mapping", String::formatted("{:#x}", *mapping)); generator.append(", @mapping@ },"); + } else if constexpr (IsSame<decltype(mapping), Optional<StringView>>) { + generator.set("mapping", String::formatted("{}", *mapping)); + generator.append(", \"@mapping@\"sv },"); } else { append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv); generator.append(" },"); @@ -748,6 +762,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; }); append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; }); append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; }); + append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; }); generator.append(R"~~~( struct CodePointRange { @@ -891,6 +906,15 @@ Span<SpecialCasing const* const> special_case_mapping(u32 code_point) return mapping->special_casing.span().slice(0, mapping->special_casing_size); } + +Optional<StringView> code_point_abbreviation(u32 code_point) +{ + auto const* mapping = binary_search(s_abbreviation_mappings, code_point, nullptr, CodePointComparator<CodePointAbbreviation> {}); + if (mapping == nullptr) + return {}; + + return mapping->abbreviation; +} )~~~"); auto append_prop_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) { |