summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibJS
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-08-28 14:46:36 -0400
committerLinus Groh <mail@linusgroh.de>2021-08-30 19:42:40 +0100
commit94e66f500cc0789400ae7a915c7f598881111dec (patch)
tree4b91aad9188a1710126ff52c2df2469e6a1cb96a /Userland/Libraries/LibJS
parentf897c2edb3deac5e442476291ae71fe73d82bfb8 (diff)
downloadserenity-94e66f500cc0789400ae7a915c7f598881111dec.zip
LibJS: Reject structurally invalid Unicode locale extensions
Diffstat (limited to 'Userland/Libraries/LibJS')
-rw-r--r--Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp48
-rw-r--r--Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js23
2 files changed, 59 insertions, 12 deletions
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp
index 338c0919e2..9422329129 100644
--- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp
+++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp
@@ -18,6 +18,20 @@ namespace JS::Intl {
// 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale)
{
+ auto contains_duplicate_variant = [](Vector<StringView>& variants) {
+ if (variants.is_empty())
+ return false;
+
+ quick_sort(variants);
+
+ for (size_t i = 0; i < variants.size() - 1; ++i) {
+ if (variants[i] == variants[i + 1])
+ return true;
+ }
+
+ return false;
+ };
+
// IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise:
// locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML ยง 3.2 Unicode Locale Identifier;
@@ -31,23 +45,33 @@ static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView
return {};
// the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and
- if (auto& variants = locale_id->language_id.variants; !variants.is_empty()) {
- quick_sort(variants);
+ if (contains_duplicate_variant(locale_id->language_id.variants))
+ return {};
- for (size_t i = 0; i < variants.size() - 1; ++i) {
- if (variants[i] == variants[i + 1])
+ // if locale contains an extensions* component, that component
+ Vector<char> unique_keys;
+ for (auto& extension : locale_id->extensions) {
+ // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
+ // contains at most one unicode_locale_extensions component,
+ // contains at most one transformed_extensions component, and
+ char key = extension.visit(
+ [](Unicode::LocaleExtension const&) { return 'u'; },
+ [](Unicode::TransformedExtension const&) { return 't'; },
+ [](Unicode::OtherExtension const& ext) { return static_cast<char>(to_ascii_lowercase(ext.key)); });
+
+ if (unique_keys.contains_slow(key))
+ return {};
+ unique_keys.append(key);
+
+ // if a transformed_extensions component that contains a tlang component is present, then
+ // the tlang component contains no duplicate unicode_variant_subtag subtags.
+ if (auto* transformed = extension.get_pointer<Unicode::TransformedExtension>()) {
+ auto& language = transformed->language;
+ if (language.has_value() && contains_duplicate_variant(language->variants))
return {};
}
}
- // FIXME: Handle extensions.
- // if locale contains an extensions* component, that component
- // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
- // contains at most one unicode_locale_extensions component,
- // contains at most one transformed_extensions component, and
- // if a transformed_extensions component that contains a tlang component is present, then
- // the tlang component contains no duplicate unicode_variant_subtag subtags.
-
return locale_id;
}
diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js
index c7e64c1468..c248dda559 100644
--- a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js
+++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js
@@ -45,6 +45,29 @@ describe("errors", () => {
Intl.getCanonicalLocales([true]);
}).toThrowWithMessage(TypeError, "true is neither an object nor a string");
});
+
+ test("duplicate extension components", () => {
+ expect(() => {
+ Intl.getCanonicalLocales("en-u-aa-U-aa");
+ }).toThrowWithMessage(RangeError, "en-u-aa-U-aa is not a structurally valid language tag");
+
+ expect(() => {
+ Intl.getCanonicalLocales("en-t-aa-T-aa");
+ }).toThrowWithMessage(RangeError, "en-t-aa-T-aa is not a structurally valid language tag");
+
+ expect(() => {
+ Intl.getCanonicalLocales("en-z-aa-Z-aa");
+ }).toThrowWithMessage(RangeError, "en-z-aa-Z-aa is not a structurally valid language tag");
+ });
+
+ test("duplicate transformed extension variant subtags", () => {
+ expect(() => {
+ Intl.getCanonicalLocales("en-t-en-POSIX-POSIX");
+ }).toThrowWithMessage(
+ RangeError,
+ "en-t-en-POSIX-POSIX is not a structurally valid language tag"
+ );
+ });
});
describe("normal behavior", () => {