diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-08-30 15:34:31 -0400 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-09-01 14:14:47 +0100 |
commit | 556374a904722eca9f068bdc8ac8fdc32b0ea361 (patch) | |
tree | 4286b9089b3c19d49ec9a36c47fb254ddf7ecae9 /Tests | |
parent | 9b118f1f0616108e2cdcf5ce52f03f862e92e508 (diff) | |
download | serenity-556374a904722eca9f068bdc8ac8fdc32b0ea361.zip |
LibUnicode: Substitute Unicode locale aliases during canonicalization
Unicode TR35 defines how locale subtag aliases should be emplaced when
converting a locale to canonical form. For most subtags, it is a simple
substitution. Language subtags depend on context; for example, the
language "sh" should become "sr-Latn", but if the original locale has a
script subtag already ("sh-Cyrl"), then only the language subtag of the
alias should be taken ("sr-Latn").
To facilitate this, we now make two passes when canonicalizing a locale.
In the first pass, we convert the LocaleID structure to canonical syntax
(where the conversions all happen in-place). In the second pass, we form
the canonical string based on the canonical syntax.
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/LibUnicode/TestUnicodeLocale.cpp | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp index 52dca55915..45cd4170f6 100644 --- a/Tests/LibUnicode/TestUnicodeLocale.cpp +++ b/Tests/LibUnicode/TestUnicodeLocale.cpp @@ -334,4 +334,40 @@ TEST_CASE(canonicalize_unicode_locale_id) test("EN-Z-BBB-U-AA-T-EN-0-AAA"sv, "en-0-aaa-t-en-u-aa-z-bbb"sv); test("en-z-bbb-u-aa-t-en-0-aaa-x-ccc"sv, "en-0-aaa-t-en-u-aa-z-bbb-x-ccc"sv); test("EN-Z-BBB-U-AA-T-EN-0-AAA-X-CCC"sv, "en-0-aaa-t-en-u-aa-z-bbb-x-ccc"sv); + + // Language subtag aliases. + test("sh"sv, "sr-Latn"sv); + test("SH"sv, "sr-Latn"sv); + test("sh-cyrl"sv, "sr-Cyrl"sv); + test("SH-CYRL"sv, "sr-Cyrl"sv); + test("cnr"sv, "sr-ME"sv); + test("CNR"sv, "sr-ME"sv); + test("cnr-ba"sv, "sr-BA"sv); + test("CNR-BA"sv, "sr-BA"sv); + + // Territory subtag aliases. + test("ru-su"sv, "ru-RU"sv); + test("RU-SU"sv, "ru-RU"sv); + test("ru-810"sv, "ru-RU"sv); + test("RU-810"sv, "ru-RU"sv); + test("en-su"sv, "en-RU"sv); + test("EN-SU"sv, "en-RU"sv); + test("en-810"sv, "en-RU"sv); + test("EN-810"sv, "en-RU"sv); + + // Script subtag aliases. + test("en-qaai"sv, "en-Zinh"sv); + test("EN-QAAI"sv, "en-Zinh"sv); + + // Variant subtag aliases. + test("en-polytoni"sv, "en-polyton"sv); + test("EN-POLYTONI"sv, "en-polyton"sv); + + // Subdivision subtag aliases. + test("en-u-sd-cn11"sv, "en-u-sd-cnbj"sv); + test("EN-U-SD-CN11"sv, "en-u-sd-cnbj"sv); + test("en-u-rg-cn12"sv, "en-u-rg-cntj"sv); + test("EN-U-RG-CN12"sv, "en-u-rg-cntj"sv); + test("en-u-aa-cn11"sv, "en-u-aa-cn11"sv); + test("EN-U-AA-CN11"sv, "en-u-aa-cn11"sv); } |