From b8ad4d302e3ac46900f819ac22cb1c96552685fb Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 26 Aug 2021 06:42:11 -0400 Subject: LibUnicode: Move Locale enumeration from generated UCD data to CLDR data The UCD set of data contained a very small subset of all locales just to handle some special casing rules. This enumeration will be needed within the CLDR generator as well. So rather than duplicate the enum, remove it from the UCD generator in favor of the full list of locales known by the CLDR generator. --- .../CodeGenerators/GenerateUnicodeData.cpp | 8 +++--- .../CodeGenerators/GenerateUnicodeLocale.cpp | 30 ++++++++++++++++------ Userland/Libraries/LibUnicode/Forward.h | 2 +- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp index e60b5549b5..48b71862da 100644 --- a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp +++ b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp @@ -91,7 +91,6 @@ struct UnicodeData { Vector special_casing; u32 largest_casing_transform_size { 0 }; u32 largest_special_casing_size { 0 }; - Vector locales; Vector conditions; Vector code_point_data; @@ -200,11 +199,10 @@ static void parse_special_casing(Core::File& file, UnicodeData& unicode_data) casing.condition = move(conditions[0]); } - casing.locale = casing.locale.to_uppercase(); + if (!casing.locale.is_empty()) + casing.locale = String::formatted("{:c}{}", to_ascii_uppercase(casing.locale[0]), casing.locale.substring_view(1)); casing.condition.replace("_", "", true); - if (!casing.locale.is_empty() && !unicode_data.locales.contains_slow(casing.locale)) - unicode_data.locales.append(casing.locale); if (!casing.condition.is_empty() && !unicode_data.conditions.contains_slow(casing.condition)) unicode_data.conditions.append(casing.condition); } @@ -487,11 +485,11 @@ enum class @name@ : @underlying@ {)~~~"); #include #include #include +#include namespace Unicode { )~~~"); - generate_enum("Locale"sv, "None"sv, move(unicode_data.locales)); generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions)); generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases); generate_enum("Property"sv, {}, unicode_data.prop_list.keys(), unicode_data.prop_aliases); diff --git a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeLocale.cpp b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeLocale.cpp index b6c36211c8..c83fe82c66 100644 --- a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeLocale.cpp +++ b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeLocale.cpp @@ -126,11 +126,13 @@ static void parse_all_locales(String locale_names_path, UnicodeLocaleData& local } } -static String format_identifier(StringView owner, StringView identifier) +static String format_identifier(StringView owner, String identifier) { + identifier.replace("-"sv, "_"sv, true); + if (all_of(identifier, is_ascii_digit)) return String::formatted("{}_{}", owner[0], identifier); - return identifier.to_titlecase_string(); + return identifier.to_titlecase(); } static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data) @@ -138,12 +140,20 @@ static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& StringBuilder builder; SourceGenerator generator { builder }; - auto generate_enum = [&](StringView name, Vector& values) { + auto generate_enum = [&](StringView name, StringView default_, Vector& values) { quick_sort(values); generator.set("name", name); + generator.set("underlying", ((values.size() + !default_.is_empty()) < 256) ? "u8"sv : "u16"sv); + generator.append(R"~~~( -enum class @name@ : u8 {)~~~"); +enum class @name@ : @underlying@ {)~~~"); + + if (!default_.is_empty()) { + generator.set("default", default_); + generator.append(R"~~~( + @default@,)~~~"); + } for (auto const& value : values) { generator.set("value", format_identifier(name, value)); @@ -169,9 +179,11 @@ enum class @name@ : u8 {)~~~"); namespace Unicode { )~~~"); - generate_enum("Language"sv, locale_data.languages); - generate_enum("Territory"sv, locale_data.territories); - generate_enum("Variant"sv, locale_data.variants); + auto locales = locale_data.locales.keys(); + generate_enum("Locale"sv, "None"sv, locales); + generate_enum("Language"sv, {}, locale_data.languages); + generate_enum("Territory"sv, {}, locale_data.territories); + generate_enum("Variant"sv, {}, locale_data.variants); generator.append(R"~~~( struct LocaleData { @@ -187,6 +199,7 @@ namespace Detail { LocaleMap const& available_locales(); +Optional locale_from_string(StringView const& locale); Optional language_from_string(StringView const& language); Optional territory_from_string(StringView const& territory); @@ -284,7 +297,7 @@ LocaleMap const& available_locales() } )~~~"); - auto append_from_string = [&](StringView enum_title, StringView enum_snake, Vector& values) { + auto append_from_string = [&](StringView enum_title, StringView enum_snake, Vector const& values) { generator.set("enum_title", enum_title); generator.set("enum_snake", enum_snake); @@ -311,6 +324,7 @@ Optional<@enum_title@> @enum_snake@_from_string(StringView const& @enum_snake@) )~~~"); }; + append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys()); append_from_string("Language"sv, "language"sv, locale_data.languages); append_from_string("Territory"sv, "territory"sv, locale_data.territories); diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index f3b3cf6431..a8ef73a70c 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -13,7 +13,7 @@ namespace Unicode { enum class Condition : u8; enum class GeneralCategory : u8; enum class Language : u8; -enum class Locale : u8; +enum class Locale : u16; enum class Property : u8; enum class Script : u8; enum class Territory : u8; -- cgit v1.2.3