summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp36
-rw-r--r--Userland/Libraries/LibUnicode/CharacterTypes.cpp1
-rw-r--r--Userland/Libraries/LibUnicode/CharacterTypes.h2
3 files changed, 33 insertions, 6 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
index c8b2865582..eedf0e638a 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
@@ -72,6 +72,7 @@ struct CodePointName {
struct CodePointData {
u32 code_point { 0 };
String name;
+ Optional<StringView> abbreviation;
u8 canonical_combining_class { 0 };
String bidi_class;
String decomposition_type;
@@ -101,6 +102,7 @@ struct UnicodeData {
Vector<CodePointData> code_point_data;
+ HashMap<u32, String> code_point_abbreviations;
HashMap<u32, String> code_point_display_name_aliases;
Vector<CodePointName> code_point_display_names;
@@ -302,11 +304,12 @@ static void parse_name_aliases(Core::File& file, UnicodeData& unicode_data)
auto alias = segments[1].trim_whitespace();
auto reason = segments[2].trim_whitespace();
- if (!reason.is_one_of("correction"sv, "control"sv))
- continue;
-
- if (!unicode_data.code_point_display_name_aliases.contains(*code_point))
- unicode_data.code_point_display_name_aliases.set(*code_point, alias);
+ if (reason == "abbreviation"sv) {
+ unicode_data.code_point_abbreviations.set(*code_point, alias);
+ } else if (reason.is_one_of("correction"sv, "control"sv)) {
+ if (!unicode_data.code_point_display_name_aliases.contains(*code_point))
+ unicode_data.code_point_display_name_aliases.set(*code_point, alias);
+ }
}
}
@@ -476,6 +479,9 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data)
data.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[13]);
data.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[14]);
+ if (auto abbreviation = unicode_data.code_point_abbreviations.get(data.code_point); abbreviation.has_value())
+ data.abbreviation = *abbreviation;
+
if (!assigned_code_point_range_start.has_value())
assigned_code_point_range_start = data.code_point;
@@ -683,6 +689,11 @@ struct SpecialCaseMapping {
u32 special_casing_size { 0 };
};
+struct CodePointAbbreviation {
+ u32 code_point { 0 };
+ StringView abbreviation {};
+};
+
template<typename MappingType>
struct CodePointComparator {
constexpr int operator()(u32 code_point, MappingType const& mapping)
@@ -707,7 +718,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
for (auto const& data : unicode_data.code_point_data) {
auto mapping = mapping_getter(data);
- if constexpr (IsSame<decltype(mapping), Optional<u32>>) {
+ if constexpr (requires { mapping.has_value(); }) {
if (!mapping.has_value())
continue;
} else {
@@ -724,6 +735,9 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
if constexpr (IsSame<decltype(mapping), Optional<u32>>) {
generator.set("mapping", String::formatted("{:#x}", *mapping));
generator.append(", @mapping@ },");
+ } else if constexpr (IsSame<decltype(mapping), Optional<StringView>>) {
+ generator.set("mapping", String::formatted("{}", *mapping));
+ generator.append(", \"@mapping@\"sv },");
} else {
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
generator.append(" },");
@@ -748,6 +762,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; });
append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; });
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
+ append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
generator.append(R"~~~(
struct CodePointRange {
@@ -891,6 +906,15 @@ Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
return mapping->special_casing.span().slice(0, mapping->special_casing_size);
}
+
+Optional<StringView> code_point_abbreviation(u32 code_point)
+{
+ auto const* mapping = binary_search(s_abbreviation_mappings, code_point, nullptr, CodePointComparator<CodePointAbbreviation> {});
+ if (mapping == nullptr)
+ return {};
+
+ return mapping->abbreviation;
+}
)~~~");
auto append_prop_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) {
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
index 901d7f606a..72344f11b3 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -22,6 +22,7 @@
namespace Unicode {
Optional<String> __attribute__((weak)) code_point_display_name(u32) { return {}; }
+Optional<StringView> __attribute__((weak)) code_point_abbreviation(u32) { return {}; }
u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; }
Span<SpecialCasing const* const> __attribute__((weak)) special_case_mapping(u32) { return {}; }
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.h b/Userland/Libraries/LibUnicode/CharacterTypes.h
index 246f7b69b1..484fa35bd6 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.h
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.h
@@ -16,6 +16,8 @@
namespace Unicode {
Optional<String> code_point_display_name(u32 code_point);
+Optional<StringView> code_point_abbreviation(u32 code_point);
+
u32 canonical_combining_class(u32 code_point);
Span<SpecialCasing const* const> special_case_mapping(u32 code_point);