diff options
-rw-r--r-- | Tests/LibUnicode/TestUnicodeLocale.cpp | 10 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/Locale.cpp | 31 |
2 files changed, 35 insertions, 6 deletions
diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp index 878c6c2731..a536118de2 100644 --- a/Tests/LibUnicode/TestUnicodeLocale.cpp +++ b/Tests/LibUnicode/TestUnicodeLocale.cpp @@ -320,6 +320,14 @@ TEST_CASE(canonicalize_unicode_locale_id) test("EN-U-TZ-HONGKONG"sv, "en-u-tz-hkhkg"sv); test("en-u-ta-hongkong"sv, "en-u-ta-hongkong"sv); test("EN-U-TA-HONGKONG"sv, "en-u-ta-hongkong"sv); + test("en-u-ca-ethiopic-amete-alem"sv, "en-u-ca-ethioaa"sv); + test("EN-U-CA-ETHIOPIC-AMETE-ALEM"sv, "en-u-ca-ethioaa"sv); + test("en-u-ca-alem-ethiopic-amete"sv, "en-u-ca-alem-ethiopic-amete"sv); + test("EN-U-CA-ALEM-ETHIOPIC-AMETE"sv, "en-u-ca-alem-ethiopic-amete"sv); + test("en-u-ca-ethiopic-amete-xxx-alem"sv, "en-u-ca-ethiopic-amete-xxx-alem"sv); + test("EN-U-CA-ETHIOPIC-AMETE-XXX-ALEM"sv, "en-u-ca-ethiopic-amete-xxx-alem"sv); + test("en-u-cb-ethiopic-amete-alem"sv, "en-u-cb-ethiopic-amete-alem"sv); + test("EN-U-CB-ETHIOPIC-AMETE-ALEM"sv, "en-u-cb-ethiopic-amete-alem"sv); test("en-t-en"sv, "en-t-en"sv); test("EN-T-EN"sv, "en-t-en"sv); @@ -345,6 +353,8 @@ TEST_CASE(canonicalize_unicode_locale_id) test("EN-T-K1-IMPERIAL"sv, "en-t-k1-imperial"sv); test("en-t-k1-hongkong"sv, "en-t-k1-hongkong"sv); test("EN-T-K1-HONGKONG"sv, "en-t-k1-hongkong"sv); + test("en-t-k1-ethiopic-amete-alem"sv, "en-t-k1-ethiopic-amete-alem"sv); + test("EN-T-K1-ETHIOPIC-AMETE-ALEM"sv, "en-t-k1-ethiopic-amete-alem"sv); test("en-0-aaa"sv, "en-0-aaa"sv); test("EN-0-AAA"sv, "en-0-aaa"sv); diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index e91561c5ea..6dd5a23655 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -483,6 +483,7 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale) static void perform_hard_coded_key_value_substitutions(String& key, String& value) { // FIXME: In the XML export of CLDR, there are some aliases defined in the following files: + // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/collation.xml // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/measure.xml // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/timezone.xml @@ -490,7 +491,9 @@ static void perform_hard_coded_key_value_substitutions(String& key, String& valu // // There doesn't seem to be a counterpart in the JSON export. Since there aren't many such // aliases, until an XML parser is implemented, those aliases are implemented here. - if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) { + if ((key == "ca"sv) && (value == "islamicc"sv)) { + value = "islamic-civil"sv; + } else if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) { value = "true"sv; } else if (key == "ks"sv) { if (value == "primary"sv) @@ -540,6 +543,20 @@ static void perform_hard_coded_key_value_substitutions(String& key, String& valu } } +static void perform_hard_coded_key_multi_value_substitutions(String const& key, Vector<String>& values) +{ + // Similar to perform_hard_coded_key_value_substitutions, some aliases depend on multiple + // variants being present in the original locale. Those are canonicalized separately here. + // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml + if ((key != "ca"sv) || (values.size() != 3)) + return; + + static Vector<String> ethiopic_amete_alem { "ethiopic"sv, "amete"sv, "alem"sv }; + + if (values == ethiopic_amete_alem) + values = { "ethioaa"sv }; +} + static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) { auto canonicalize_language = [](LanguageID& language_id, bool force_lowercase) { @@ -626,6 +643,8 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) values.append(move(value)); } + + perform_hard_coded_key_multi_value_substitutions(key, values); }; canonicalize_language(locale_id.language_id, false); @@ -644,22 +663,22 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) for (auto& extension : locale_id.extensions) { extension.visit( [&](LocaleExtension& ext) { - quick_sort(ext.attributes); - quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; }); - for (auto& attribute : ext.attributes) attribute = attribute.to_lowercase(); for (auto& keyword : ext.keywords) canonicalize_key_value_list(keyword.key, keyword.types, true); + + quick_sort(ext.attributes); + quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; }); }, [&](TransformedExtension& ext) { if (ext.language.has_value()) canonicalize_language(*ext.language, true); - quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; }); - for (auto& field : ext.fields) canonicalize_key_value_list(field.key, field.values, false); + + quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; }); }, [&](OtherExtension& ext) { ext.key = static_cast<char>(to_ascii_lowercase(ext.key)); |