summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Tests/LibUnicode/TestUnicodeLocale.cpp24
-rw-r--r--Userland/Libraries/LibUnicode/Locale.cpp41
-rw-r--r--Userland/Libraries/LibUnicode/Locale.h1
3 files changed, 65 insertions, 1 deletions
diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp
index dc0aa45974..80b7ee8ba1 100644
--- a/Tests/LibUnicode/TestUnicodeLocale.cpp
+++ b/Tests/LibUnicode/TestUnicodeLocale.cpp
@@ -246,6 +246,30 @@ TEST_CASE(parse_unicode_locale_id_with_other_extension)
pass("en-z-aa-bbb-cccccccc", { 'z', { "aa"sv, "bbb"sv, "cccccccc"sv } });
}
+TEST_CASE(parse_unicode_locale_id_with_private_use_extension)
+{
+ auto fail = [](StringView locale) {
+ auto locale_id = Unicode::parse_unicode_locale_id(locale);
+ EXPECT(!locale_id.has_value());
+ };
+ auto pass = [](StringView locale, Vector<StringView> const& expected_extension) {
+ auto locale_id = Unicode::parse_unicode_locale_id(locale);
+ VERIFY(locale_id.has_value());
+ EXPECT_EQ(locale_id->private_use_extensions, expected_extension);
+ };
+
+ fail("en-x"sv);
+ fail("en-x-"sv);
+ fail("en-x-aaaaaaaaa"sv);
+ fail("en-x-aaa-"sv);
+ fail("en-x-aaa-aaaaaaaaa"sv);
+
+ pass("en-x-a", { "a"sv });
+ pass("en-x-aaaaaaaa", { "aaaaaaaa"sv });
+ pass("en-x-aaa-bbb", { "aaa"sv, "bbb"sv });
+ pass("en-x-aaa-x-bbb", { "aaa"sv, "x"sv, "bbb"sv });
+}
+
TEST_CASE(canonicalize_unicode_locale_id)
{
auto test = [](StringView locale, StringView expected_canonical_locale) {
diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp
index d886c4a812..9dda3c2bba 100644
--- a/Userland/Libraries/LibUnicode/Locale.cpp
+++ b/Userland/Libraries/LibUnicode/Locale.cpp
@@ -400,6 +400,45 @@ static Optional<Extension> parse_extension(GenericLexer& lexer)
return {};
}
+static Vector<StringView> parse_private_use_extensions(GenericLexer& lexer)
+{
+ // https://unicode.org/reports/tr35/#pu_extensions
+ //
+ // pu_extensions = = sep [xX] (sep alphanum{1,8})+ ;
+ size_t starting_position = lexer.tell();
+
+ auto header = consume_next_segment(lexer);
+ if (!header.has_value())
+ return {};
+
+ auto parse_values = [&]() -> Vector<StringView> {
+ Vector<StringView> extensions;
+
+ while (true) {
+ auto segment = consume_next_segment(lexer);
+ if (!segment.has_value())
+ break;
+
+ if ((segment->length() < 1) || (segment->length() > 8) || !all_of(*segment, is_ascii_alphanumeric)) {
+ lexer.retreat(segment->length() + 1);
+ break;
+ }
+
+ extensions.append(*segment);
+ }
+
+ return extensions;
+ };
+
+ if ((header->length() == 1) && (((*header)[0] == 'x') || ((*header)[0] == 'X'))) {
+ if (auto extensions = parse_values(); !extensions.is_empty())
+ return extensions;
+ }
+
+ lexer.retreat(lexer.tell() - starting_position);
+ return {};
+}
+
Optional<LanguageID> parse_unicode_language_id(StringView language)
{
GenericLexer lexer { language };
@@ -433,7 +472,7 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale)
locale_id.extensions.append(extension.release_value());
}
- // FIXME: Handle pu_extensions.
+ locale_id.private_use_extensions = parse_private_use_extensions(lexer);
if (!lexer.is_eof())
return {};
diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h
index b8fc43d1e4..5fc244e5ed 100644
--- a/Userland/Libraries/LibUnicode/Locale.h
+++ b/Userland/Libraries/LibUnicode/Locale.h
@@ -53,6 +53,7 @@ using Extension = Variant<LocaleExtension, TransformedExtension, OtherExtension>
struct LocaleID {
LanguageID language_id {};
Vector<Extension> extensions {};
+ Vector<StringView> private_use_extensions {};
};
// Note: These methods only verify that the provided strings match the EBNF grammar of the