summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2022-07-20 13:52:36 -0400
committerLinus Groh <mail@linusgroh.de>2022-07-20 22:30:16 +0100
commit0a6363d3e93ac880e00a4d53d0e24b8c6e07fded (patch)
treec5239412b208500b318760fe28e6c6ef315a48a8 /Userland
parentb2709f161efec2ad26f831f70aba422a407cd268 (diff)
downloadserenity-0a6363d3e93ac880e00a4d53d0e24b8c6e07fded.zip
LibUnicode: Implement the range pattern processing algorithm
This algorithm is to inject spacing around the range separator under certain conditions. For example, in en-US, the range [3, 5] should be formatted as "3–5" if unitless, but as "$3 – $5" for currency.
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibUnicode/NumberFormat.cpp54
-rw-r--r--Userland/Libraries/LibUnicode/NumberFormat.h1
2 files changed, 45 insertions, 10 deletions
diff --git a/Userland/Libraries/LibUnicode/NumberFormat.cpp b/Userland/Libraries/LibUnicode/NumberFormat.cpp
index b15322fd78..38adf51c32 100644
--- a/Userland/Libraries/LibUnicode/NumberFormat.cpp
+++ b/Userland/Libraries/LibUnicode/NumberFormat.cpp
@@ -50,6 +50,17 @@ String replace_digits_for_number_system(StringView system, StringView number)
return builder.build();
}
+static u32 last_code_point(StringView string)
+{
+ Utf8View utf8_string { string };
+ u32 code_point = 0;
+
+ for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
+ code_point = *it;
+
+ return code_point;
+}
+
// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern)
{
@@ -67,16 +78,6 @@ Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView cur
Utf8View utf8_currency_display { currency_display };
Optional<String> currency_key_with_spacing;
- auto last_code_point = [](StringView string) {
- Utf8View utf8_string { string };
- u32 code_point = 0;
-
- for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
- code_point = *it;
-
- return code_point;
- };
-
if (*number_index < *currency_index) {
u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index));
@@ -104,4 +105,37 @@ Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView cur
return {};
}
+// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
+Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper)
+{
+#if ENABLE_UNICODE_DATA
+ auto range_pattern_with_spacing = [&]() {
+ return String::formatted(" {} ", range_separator);
+ };
+
+ Utf8View utf8_range_separator { range_separator };
+ Utf8View utf8_upper { upper };
+
+ // NOTE: Our implementation does the prescribed checks backwards for simplicity.
+
+ // To determine whether to add spacing, the currently recommended heuristic is:
+ // 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
+ for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
+ if (code_point_has_property(*it, Property::White_Space))
+ return {};
+ }
+
+ // 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
+ if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
+ if (!code_point_has_general_category(*it, GeneralCategory::Decimal_Number))
+ return range_pattern_with_spacing();
+ }
+
+ if (!code_point_has_general_category(last_code_point(lower), GeneralCategory::Decimal_Number))
+ return range_pattern_with_spacing();
+#endif
+
+ return {};
+}
+
}
diff --git a/Userland/Libraries/LibUnicode/NumberFormat.h b/Userland/Libraries/LibUnicode/NumberFormat.h
index 38bb279d03..315416977d 100644
--- a/Userland/Libraries/LibUnicode/NumberFormat.h
+++ b/Userland/Libraries/LibUnicode/NumberFormat.h
@@ -71,5 +71,6 @@ Vector<NumberFormat> get_compact_number_system_formats(StringView locale, String
Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style);
Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern);
+Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper);
}