diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-08-28 14:46:36 -0400 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-08-30 19:42:40 +0100 |
commit | 94e66f500cc0789400ae7a915c7f598881111dec (patch) | |
tree | 4b91aad9188a1710126ff52c2df2469e6a1cb96a /Userland/Libraries/LibJS | |
parent | f897c2edb3deac5e442476291ae71fe73d82bfb8 (diff) | |
download | serenity-94e66f500cc0789400ae7a915c7f598881111dec.zip |
LibJS: Reject structurally invalid Unicode locale extensions
Diffstat (limited to 'Userland/Libraries/LibJS')
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp | 48 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js | 23 |
2 files changed, 59 insertions, 12 deletions
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp index 338c0919e2..9422329129 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp @@ -18,6 +18,20 @@ namespace JS::Intl { // 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale) { + auto contains_duplicate_variant = [](Vector<StringView>& variants) { + if (variants.is_empty()) + return false; + + quick_sort(variants); + + for (size_t i = 0; i < variants.size() - 1; ++i) { + if (variants[i] == variants[i + 1]) + return true; + } + + return false; + }; + // IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise: // locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML ยง 3.2 Unicode Locale Identifier; @@ -31,23 +45,33 @@ static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView return {}; // the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and - if (auto& variants = locale_id->language_id.variants; !variants.is_empty()) { - quick_sort(variants); + if (contains_duplicate_variant(locale_id->language_id.variants)) + return {}; - for (size_t i = 0; i < variants.size() - 1; ++i) { - if (variants[i] == variants[i + 1]) + // if locale contains an extensions* component, that component + Vector<char> unique_keys; + for (auto& extension : locale_id->extensions) { + // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags, + // contains at most one unicode_locale_extensions component, + // contains at most one transformed_extensions component, and + char key = extension.visit( + [](Unicode::LocaleExtension const&) { return 'u'; }, + [](Unicode::TransformedExtension const&) { return 't'; }, + [](Unicode::OtherExtension const& ext) { return static_cast<char>(to_ascii_lowercase(ext.key)); }); + + if (unique_keys.contains_slow(key)) + return {}; + unique_keys.append(key); + + // if a transformed_extensions component that contains a tlang component is present, then + // the tlang component contains no duplicate unicode_variant_subtag subtags. + if (auto* transformed = extension.get_pointer<Unicode::TransformedExtension>()) { + auto& language = transformed->language; + if (language.has_value() && contains_duplicate_variant(language->variants)) return {}; } } - // FIXME: Handle extensions. - // if locale contains an extensions* component, that component - // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags, - // contains at most one unicode_locale_extensions component, - // contains at most one transformed_extensions component, and - // if a transformed_extensions component that contains a tlang component is present, then - // the tlang component contains no duplicate unicode_variant_subtag subtags. - return locale_id; } diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js index c7e64c1468..c248dda559 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js @@ -45,6 +45,29 @@ describe("errors", () => { Intl.getCanonicalLocales([true]); }).toThrowWithMessage(TypeError, "true is neither an object nor a string"); }); + + test("duplicate extension components", () => { + expect(() => { + Intl.getCanonicalLocales("en-u-aa-U-aa"); + }).toThrowWithMessage(RangeError, "en-u-aa-U-aa is not a structurally valid language tag"); + + expect(() => { + Intl.getCanonicalLocales("en-t-aa-T-aa"); + }).toThrowWithMessage(RangeError, "en-t-aa-T-aa is not a structurally valid language tag"); + + expect(() => { + Intl.getCanonicalLocales("en-z-aa-Z-aa"); + }).toThrowWithMessage(RangeError, "en-z-aa-Z-aa is not a structurally valid language tag"); + }); + + test("duplicate transformed extension variant subtags", () => { + expect(() => { + Intl.getCanonicalLocales("en-t-en-POSIX-POSIX"); + }).toThrowWithMessage( + RangeError, + "en-t-en-POSIX-POSIX is not a structurally valid language tag" + ); + }); }); describe("normal behavior", () => { |