summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-11-17 09:56:16 -0500
committerAndreas Kling <kling@serenityos.org>2021-11-19 11:45:35 +0100
commit93ee9220270c86f63c5629ffe974f8eccb1c414d (patch)
treeb93738866d4ab7d1053e937c83a6eca9c98f7925
parent4b535ce1c83696c8f856053c84921323c2e4a040 (diff)
downloadserenity-93ee9220270c86f63c5629ffe974f8eccb1c414d.zip
LibUnicode: Support locales-without-script aliases for ECMA-402
As noted by ECMA-402, if a supported locale contains all of a language, script, and region subtag, then the implementation must also support the locale without the script subtag. The most complicated example of this is the zh-TW locale. The list of locales in the CLDR database does not include zh-TW or its maximized zh-Hant-TW variant. Instead, it inlcudes the zh-Hant locale. However, zh-Hant-TW is listed in the default-content locale list in the cldr-core package. This defines an alias from zh-Hant-TW to zh-Hant. We must then also support the zh-Hant-TW alias without the script subtag: zh-TW. This transitively maps zh-TW to zh-Hant, which is a case quite heavily tested by test262.
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp43
-rw-r--r--Tests/LibUnicode/TestUnicodeLocale.cpp8
2 files changed, 51 insertions, 0 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp
index 78e64a861c..f8cf25ec5a 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp
@@ -427,6 +427,48 @@ static void parse_default_content_locales(String core_path, UnicodeLocaleData& l
});
}
+static void define_aliases_without_scripts(UnicodeLocaleData& locale_data)
+{
+ // From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots
+ //
+ // For locales that include a script subtag in addition to language and region, the
+ // corresponding locale without a script subtag must also be supported.
+ //
+ // So we define aliases for locales that contain all three subtags, but we must also take
+ // care to handle when the locale itself or the locale without a script subtag are an alias
+ // by way of default-content locales.
+ auto find_alias = [&](auto const& locale) {
+ return locale_data.locale_aliases.find_if([&](auto const& alias) { return locale == alias.alias; });
+ };
+
+ auto append_alias_without_script = [&](auto const& locale) {
+ auto parsed_locale = CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, locale);
+ VERIFY(parsed_locale.has_value());
+
+ if ((parsed_locale->language == 0) || (parsed_locale->script == 0) || (parsed_locale->region == 0))
+ return;
+
+ auto locale_without_script = String::formatted("{}-{}",
+ locale_data.unique_strings.get(parsed_locale->language),
+ locale_data.unique_strings.get(parsed_locale->region));
+
+ if (locale_data.locales.contains(locale_without_script))
+ return;
+ if (find_alias(locale_without_script) != locale_data.locale_aliases.end())
+ return;
+
+ if (auto it = find_alias(locale); it != locale_data.locale_aliases.end())
+ locale_data.locale_aliases.append({ it->name, locale_without_script });
+ else
+ locale_data.locale_aliases.append({ locale, locale_without_script });
+ };
+
+ for (auto const& locale : locale_data.locales)
+ append_alias_without_script(locale.key);
+ for (auto const& locale : locale_data.locale_aliases)
+ append_alias_without_script(locale.alias);
+}
+
static void parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
{
auto identity_iterator = path_to_dir_iterator(locale_names_path);
@@ -508,6 +550,7 @@ static void parse_all_locales(String core_path, String locale_names_path, String
}
parse_default_content_locales(move(core_path), locale_data);
+ define_aliases_without_scripts(locale_data);
}
static String format_identifier(StringView owner, String identifier)
diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp
index 34619c8cd7..9f8064212a 100644
--- a/Tests/LibUnicode/TestUnicodeLocale.cpp
+++ b/Tests/LibUnicode/TestUnicodeLocale.cpp
@@ -463,3 +463,11 @@ TEST_CASE(canonicalize_unicode_locale_id)
test("zh-Hans-CN"sv, "zh-Hans-CN"sv);
test("ZH-HANS-CN"sv, "zh-Hans-CN"sv);
}
+
+TEST_CASE(supports_locale_aliases)
+{
+ EXPECT(Unicode::is_locale_available("zh"sv));
+ EXPECT(Unicode::is_locale_available("zh-Hant"sv));
+ EXPECT(Unicode::is_locale_available("zh-TW"sv));
+ EXPECT(Unicode::is_locale_available("zh-Hant-TW"sv));
+}