diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-11-10 08:54:45 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-11-12 09:17:08 +0000 |
commit | 230b133ee34b1608bb5806204b05d5ac8e9420af (patch) | |
tree | bea3bb2668e4262aecf4d880401e6a3ac86a11ff /Meta | |
parent | 1244ebcd4f48bebc147e11eb5f4b7883730cfe0a (diff) | |
download | serenity-230b133ee34b1608bb5806204b05d5ac8e9420af.zip |
LibUnicode: Parse number formats into zero/positive/negative patterns
A number formatting pattern in the CLDR contains one or two entries,
delimited by a semi-colon. Previously, LibUnicode was just storing the
entire pattern as one string. This changes the generator to split the
pattern on that delimiter and generate the 3 unique patterns expected by
ECMA-402.
The rules for generating the 3 patterns are as follows:
* If the pattern contains 1 entry, it is the zero pattern. The positive
pattern is the zero pattern prepended with {plusSign}. The negative
pattern is the zero pattern prepended with {minusSign}.
* If the pattern contains 2 entries, the first is the zero pattern, and
the second is the negative pattern. The positive pattern is the zero
pattern prepended with {plusSign}.
Diffstat (limited to 'Meta')
-rw-r--r-- | Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp | 76 |
1 files changed, 66 insertions, 10 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp index 27259b15fe..c71c018abe 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp @@ -48,7 +48,9 @@ struct NumberFormat : public Unicode::NumberFormat { VERIFY_NOT_REACHED(); } - StringIndexType format_index { 0 }; + StringIndexType zero_format_index { 0 }; + StringIndexType positive_format_index { 0 }; + StringIndexType negative_format_index { 0 }; }; struct NumberSystem { @@ -513,6 +515,54 @@ static void parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData locale_data.keywords.append(key); } +static void parse_number_pattern(String pattern, UnicodeLocaleData& locale_data, NumberFormat& format) +{ + // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns + // https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns + auto replace_patterns = [](String pattern) { + static HashMap<StringView, StringView> replacements = { + { "%"sv, "{percentSign}"sv }, + { "+"sv, "{plusSign}"sv }, + { "-"sv, "{minusSign}"sv }, + { "ยค"sv, "{currencyCode}"sv }, // U+00A4 Currency Sign + }; + + if (auto start_number_index = pattern.find('#'); start_number_index.has_value()) { + auto end_number_index = *start_number_index + 1; + + for (; end_number_index < pattern.length(); ++end_number_index) { + auto ch = pattern[end_number_index]; + if ((ch != '#') && (ch != '0') && (ch != ',') && (ch != '.')) + break; + } + + pattern = String::formatted("{}{{number}}{}", + *start_number_index > 0 ? pattern.substring_view(0, *start_number_index) : ""sv, + pattern.substring_view(end_number_index)); + } + + for (auto const& replacement : replacements) + pattern = pattern.replace(replacement.key, replacement.value, true); + + return pattern; + }; + + auto patterns = pattern.split(';'); + VERIFY((patterns.size() == 1) || (patterns.size() == 2)); + + auto zero_format = replace_patterns(move(patterns[0])); + format.positive_format_index = ensure_unique_string(locale_data, String::formatted("{{plusSign}}{}", zero_format)); + + if (patterns.size() == 2) { + auto negative_format = replace_patterns(move(patterns[1])); + format.negative_format_index = ensure_unique_string(locale_data, move(negative_format)); + } else { + format.negative_format_index = ensure_unique_string(locale_data, String::formatted("{{minusSign}}{}", zero_format)); + } + + format.zero_format_index = ensure_unique_string(locale_data, move(zero_format)); +} + static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath numbers_path(move(locale_numbers_path)); @@ -550,7 +600,7 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& NumberFormat format {}; format.magnitude = static_cast<u8>(log10(type)); format.plurality = NumberFormat::plurality_from_string(split_key[2]); - format.format_index = ensure_unique_string(locale_data, value.as_string()); + parse_number_pattern(value.as_string(), locale_data, format); result.append(move(format)); }); @@ -580,7 +630,7 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - number_system.decimal_format.format_index = ensure_unique_string(locale_data, format_object.as_string()); + parse_number_pattern(format_object.as_string(), locale_data, number_system.decimal_format); auto const& long_format = value.as_object().get("long"sv).as_object().get("decimalFormat"sv); number_system.decimal_long_formats = parse_number_format(long_format.as_object()); @@ -592,10 +642,10 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - number_system.currency_format.format_index = ensure_unique_string(locale_data, format_object.as_string()); + parse_number_pattern(format_object.as_string(), locale_data, number_system.currency_format); format_object = value.as_object().get("accounting"sv); - number_system.accounting_format.format_index = ensure_unique_string(locale_data, format_object.as_string()); + parse_number_pattern(format_object.as_string(), locale_data, number_system.accounting_format); if (value.as_object().has("short"sv)) { auto const& short_format = value.as_object().get("short"sv).as_object().get("standard"sv); @@ -606,7 +656,7 @@ static void parse_number_systems(String locale_numbers_path, UnicodeLocaleData& auto& number_system = ensure_number_system(system); auto format_object = value.as_object().get("standard"sv); - number_system.percent_format.format_index = ensure_unique_string(locale_data, format_object.as_string()); + parse_number_pattern(format_object.as_string(), locale_data, number_system.percent_format); } }); } @@ -915,14 +965,18 @@ struct NumberFormat { number_format.magnitude = magnitude; number_format.plurality = static_cast<Unicode::NumberFormat::Plurality>(plurality); - number_format.format = s_string_list[format]; + number_format.zero_format = s_string_list[zero_format]; + number_format.positive_format = s_string_list[positive_format]; + number_format.negative_format = s_string_list[negative_format]; return number_format; } u8 magnitude { 0 }; u8 plurality { 0 }; - @string_index_type@ format { 0 }; + @string_index_type@ zero_format { 0 }; + @string_index_type@ positive_format { 0 }; + @string_index_type@ negative_format { 0 }; }; struct NumberSystem { @@ -998,8 +1052,10 @@ static constexpr Array<@string_index_type@, @size@> @name@ { { auto append_number_format = [&](auto const& number_format) { generator.set("magnitude"sv, String::number(number_format.magnitude)); generator.set("plurality"sv, String::number(static_cast<u8>(number_format.plurality))); - generator.set("format"sv, String::number(number_format.format_index)); - generator.append("{ @magnitude@, @plurality@, @format@ },"); + generator.set("zero_format"sv, String::number(number_format.zero_format_index)); + generator.set("positive_format"sv, String::number(number_format.positive_format_index)); + generator.set("negative_format"sv, String::number(number_format.negative_format_index)); + generator.append("{ @magnitude@, @plurality@, @zero_format@, @positive_format@, @negative_format@ },"); }; auto append_number_formats = [&](String name, auto const& number_formats) { |