summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Tests/LibUnicode/TestUnicodeLocale.cpp10
-rw-r--r--Userland/Libraries/LibUnicode/Locale.cpp31
2 files changed, 35 insertions, 6 deletions
diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp
index 878c6c2731..a536118de2 100644
--- a/Tests/LibUnicode/TestUnicodeLocale.cpp
+++ b/Tests/LibUnicode/TestUnicodeLocale.cpp
@@ -320,6 +320,14 @@ TEST_CASE(canonicalize_unicode_locale_id)
test("EN-U-TZ-HONGKONG"sv, "en-u-tz-hkhkg"sv);
test("en-u-ta-hongkong"sv, "en-u-ta-hongkong"sv);
test("EN-U-TA-HONGKONG"sv, "en-u-ta-hongkong"sv);
+ test("en-u-ca-ethiopic-amete-alem"sv, "en-u-ca-ethioaa"sv);
+ test("EN-U-CA-ETHIOPIC-AMETE-ALEM"sv, "en-u-ca-ethioaa"sv);
+ test("en-u-ca-alem-ethiopic-amete"sv, "en-u-ca-alem-ethiopic-amete"sv);
+ test("EN-U-CA-ALEM-ETHIOPIC-AMETE"sv, "en-u-ca-alem-ethiopic-amete"sv);
+ test("en-u-ca-ethiopic-amete-xxx-alem"sv, "en-u-ca-ethiopic-amete-xxx-alem"sv);
+ test("EN-U-CA-ETHIOPIC-AMETE-XXX-ALEM"sv, "en-u-ca-ethiopic-amete-xxx-alem"sv);
+ test("en-u-cb-ethiopic-amete-alem"sv, "en-u-cb-ethiopic-amete-alem"sv);
+ test("EN-U-CB-ETHIOPIC-AMETE-ALEM"sv, "en-u-cb-ethiopic-amete-alem"sv);
test("en-t-en"sv, "en-t-en"sv);
test("EN-T-EN"sv, "en-t-en"sv);
@@ -345,6 +353,8 @@ TEST_CASE(canonicalize_unicode_locale_id)
test("EN-T-K1-IMPERIAL"sv, "en-t-k1-imperial"sv);
test("en-t-k1-hongkong"sv, "en-t-k1-hongkong"sv);
test("EN-T-K1-HONGKONG"sv, "en-t-k1-hongkong"sv);
+ test("en-t-k1-ethiopic-amete-alem"sv, "en-t-k1-ethiopic-amete-alem"sv);
+ test("EN-T-K1-ETHIOPIC-AMETE-ALEM"sv, "en-t-k1-ethiopic-amete-alem"sv);
test("en-0-aaa"sv, "en-0-aaa"sv);
test("EN-0-AAA"sv, "en-0-aaa"sv);
diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp
index e91561c5ea..6dd5a23655 100644
--- a/Userland/Libraries/LibUnicode/Locale.cpp
+++ b/Userland/Libraries/LibUnicode/Locale.cpp
@@ -483,6 +483,7 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale)
static void perform_hard_coded_key_value_substitutions(String& key, String& value)
{
// FIXME: In the XML export of CLDR, there are some aliases defined in the following files:
+ // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/collation.xml
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/measure.xml
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/timezone.xml
@@ -490,7 +491,9 @@ static void perform_hard_coded_key_value_substitutions(String& key, String& valu
//
// There doesn't seem to be a counterpart in the JSON export. Since there aren't many such
// aliases, until an XML parser is implemented, those aliases are implemented here.
- if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) {
+ if ((key == "ca"sv) && (value == "islamicc"sv)) {
+ value = "islamic-civil"sv;
+ } else if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) {
value = "true"sv;
} else if (key == "ks"sv) {
if (value == "primary"sv)
@@ -540,6 +543,20 @@ static void perform_hard_coded_key_value_substitutions(String& key, String& valu
}
}
+static void perform_hard_coded_key_multi_value_substitutions(String const& key, Vector<String>& values)
+{
+ // Similar to perform_hard_coded_key_value_substitutions, some aliases depend on multiple
+ // variants being present in the original locale. Those are canonicalized separately here.
+ // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml
+ if ((key != "ca"sv) || (values.size() != 3))
+ return;
+
+ static Vector<String> ethiopic_amete_alem { "ethiopic"sv, "amete"sv, "alem"sv };
+
+ if (values == ethiopic_amete_alem)
+ values = { "ethioaa"sv };
+}
+
static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
{
auto canonicalize_language = [](LanguageID& language_id, bool force_lowercase) {
@@ -626,6 +643,8 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
values.append(move(value));
}
+
+ perform_hard_coded_key_multi_value_substitutions(key, values);
};
canonicalize_language(locale_id.language_id, false);
@@ -644,22 +663,22 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
for (auto& extension : locale_id.extensions) {
extension.visit(
[&](LocaleExtension& ext) {
- quick_sort(ext.attributes);
- quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; });
-
for (auto& attribute : ext.attributes)
attribute = attribute.to_lowercase();
for (auto& keyword : ext.keywords)
canonicalize_key_value_list(keyword.key, keyword.types, true);
+
+ quick_sort(ext.attributes);
+ quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; });
},
[&](TransformedExtension& ext) {
if (ext.language.has_value())
canonicalize_language(*ext.language, true);
- quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; });
-
for (auto& field : ext.fields)
canonicalize_key_value_list(field.key, field.values, false);
+
+ quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; });
},
[&](OtherExtension& ext) {
ext.key = static_cast<char>(to_ascii_lowercase(ext.key));