diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-11-27 10:53:42 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-11-29 22:48:46 +0000 |
commit | 914675e826bb166b693a2c58ff718037f3597b17 (patch) | |
tree | 30aa3deeae9b26420f9c8b6162054e5bb8209f06 /Userland | |
parent | bb1143779275f3dfa652c88c11ac2ad6e69ac9cd (diff) | |
download | serenity-914675e826bb166b693a2c58ff718037f3597b17.zip |
LibJS+LibUnicode: Separate number formatting methods from Locale.h
Currently, we generate separate data files for locale and number format
related tables/methods, but provide public accessors for all of the data
in one Locale.h file. Rather than continuing this trend for date-time,
relative time, etc. formatting, it's a bit easier to reason about if the
public accessors are also in separate files.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/Locale.cpp | 130 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/Locale.h | 47 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/NumberFormat.cpp | 146 | ||||
-rw-r--r-- | Userland/Libraries/LibUnicode/NumberFormat.h | 65 |
7 files changed, 214 insertions, 178 deletions
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp index fd00858420..ba10f46402 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp @@ -11,6 +11,7 @@ #include <LibJS/Runtime/Intl/NumberFormat.h> #include <LibJS/Runtime/Intl/NumberFormatFunction.h> #include <LibUnicode/CurrencyCode.h> +#include <LibUnicode/Locale.h> #include <math.h> #include <stdlib.h> diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h index c3088efe7a..21cae9b6aa 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h @@ -10,7 +10,7 @@ #include <AK/String.h> #include <LibJS/Runtime/Intl/AbstractOperations.h> #include <LibJS/Runtime/Object.h> -#include <LibUnicode/Locale.h> +#include <LibUnicode/NumberFormat.h> namespace JS::Intl { diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 88ea78231e..316f63fe8f 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -5,6 +5,7 @@ SET(SOURCES CharacterTypes.cpp CurrencyCode.cpp Locale.cpp + NumberFormat.cpp ) serenity_lib(LibUnicode unicode) diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index a2a9454e88..4a61523edf 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -8,14 +8,11 @@ #include <AK/GenericLexer.h> #include <AK/QuickSort.h> #include <AK/StringBuilder.h> -#include <AK/Utf8View.h> #include <LibUnicode/CharacterTypes.h> #include <LibUnicode/Locale.h> #if ENABLE_UNICODE_DATA -# include <LibUnicode/UnicodeData.h> # include <LibUnicode/UnicodeLocale.h> -# include <LibUnicode/UnicodeNumberFormat.h> #endif namespace Unicode { @@ -815,51 +812,6 @@ Vector<StringView> get_locale_key_mapping([[maybe_unused]] StringView locale, [[ return {}; } -Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol) -{ -#if ENABLE_UNICODE_DATA - return Detail::get_number_system_symbol(locale, system, symbol); -#else - return {}; -#endif -} - -Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system) -{ -#if ENABLE_UNICODE_DATA - return Detail::get_number_system_groupings(locale, system); -#else - return {}; -#endif -} - -Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type) -{ -#if ENABLE_UNICODE_DATA - return Detail::get_compact_number_system_formats(locale, system, type); -#else - return {}; -#endif -} - -Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type) -{ -#if ENABLE_UNICODE_DATA - return Detail::get_standard_number_system_format(locale, system, type); -#else - return {}; -#endif -} - -Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style) -{ -#if ENABLE_UNICODE_DATA - return Detail::get_unit_formats(locale, unit, style); -#else - return {}; -#endif -} - Optional<ListPatterns> get_locale_list_patterns([[maybe_unused]] StringView locale, [[maybe_unused]] StringView type, [[maybe_unused]] StringView style) { #if ENABLE_UNICODE_DATA @@ -984,88 +936,6 @@ String resolve_most_likely_territory([[maybe_unused]] LanguageID const& language return aliases[0].to_string(); } -Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number) -{ - // FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization - // rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules - // Once those rules are implemented for LibJS, we better use them instead. - auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> { - if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end()) - return *it; - return {}; - }; - - if (number == 0) { - if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value()) - return patterns; - } else if (number == 1) { - if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value()) - return patterns; - } else if (number == 2) { - if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value()) - return patterns; - } else if (number > 2) { - if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value()) - return patterns; - } - - return find_plurality(NumberFormat::Plurality::Other); -} - -// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies -Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern) -{ -#if ENABLE_UNICODE_DATA - constexpr auto number_key = "{number}"sv; - constexpr auto currency_key = "{currency}"sv; - constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP) - - auto number_index = base_pattern.find(number_key); - VERIFY(number_index.has_value()); - - auto currency_index = base_pattern.find(currency_key); - VERIFY(currency_index.has_value()); - - Utf8View utf8_currency_display { currency_display }; - Optional<String> currency_key_with_spacing; - - auto last_code_point = [](StringView string) { - Utf8View utf8_string { string }; - u32 code_point = 0; - - for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it) - code_point = *it; - - return code_point; - }; - - if (*number_index < *currency_index) { - u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index)); - - if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) { - u32 first_currency_code_point = *utf8_currency_display.begin(); - - if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol)) - currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key); - } - } else { - u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index)); - - if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) { - u32 last_currency_code_point = last_code_point(currency_display); - - if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol)) - currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing); - } - } - - if (currency_key_with_spacing.has_value()) - return base_pattern.replace(currency_key, *currency_key_with_spacing); -#endif - - return {}; -} - String LanguageID::to_string() const { StringBuilder builder; diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index cfb4821120..72addb675d 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -85,46 +85,6 @@ enum class Style : u8 { Numeric, }; -struct NumberGroupings { - u8 primary_grouping_size { 0 }; - u8 secondary_grouping_size { 0 }; -}; - -enum class StandardNumberFormatType : u8 { - Decimal, - Currency, - Accounting, - Percent, - Scientific, -}; - -enum class CompactNumberFormatType : u8 { - DecimalLong, - DecimalShort, - CurrencyUnit, - CurrencyShort, -}; - -struct NumberFormat { - enum class Plurality : u8 { - Other, - Zero, - Single, - One, - Two, - Few, - Many, - }; - - u8 magnitude { 0 }; - u8 exponent { 0 }; - Plurality plurality { Plurality::Other }; - StringView zero_format {}; - StringView positive_format {}; - StringView negative_format {}; - Vector<StringView> identifiers {}; -}; - struct ListPatterns { StringView start; StringView middle; @@ -188,10 +148,6 @@ Optional<StringView> get_locale_script_mapping(StringView locale, StringView scr Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency, Style style); Vector<StringView> get_locale_key_mapping(StringView locale, StringView keyword); Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol); -Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system); -Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type); -Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type); -Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style); Optional<ListPatterns> get_locale_list_patterns(StringView locale, StringView type, StringView style); Optional<StringView> resolve_language_alias(StringView language); @@ -204,7 +160,4 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id); Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id); String resolve_most_likely_territory(LanguageID const& language_id, StringView territory_alias); -Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number); -Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern); - } diff --git a/Userland/Libraries/LibUnicode/NumberFormat.cpp b/Userland/Libraries/LibUnicode/NumberFormat.cpp new file mode 100644 index 0000000000..cffd0d4c04 --- /dev/null +++ b/Userland/Libraries/LibUnicode/NumberFormat.cpp @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Utf8View.h> +#include <LibUnicode/CharacterTypes.h> +#include <LibUnicode/Locale.h> +#include <LibUnicode/NumberFormat.h> + +#if ENABLE_UNICODE_DATA +# include <LibUnicode/UnicodeData.h> +# include <LibUnicode/UnicodeNumberFormat.h> +#endif + +namespace Unicode { + +Optional<StringView> get_number_system_symbol([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StringView symbol) +{ +#if ENABLE_UNICODE_DATA + return Detail::get_number_system_symbol(locale, system, symbol); +#else + return {}; +#endif +} + +Optional<NumberGroupings> get_number_system_groupings([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system) +{ +#if ENABLE_UNICODE_DATA + return Detail::get_number_system_groupings(locale, system); +#else + return {}; +#endif +} + +Optional<NumberFormat> get_standard_number_system_format([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] StandardNumberFormatType type) +{ +#if ENABLE_UNICODE_DATA + return Detail::get_standard_number_system_format(locale, system, type); +#else + return {}; +#endif +} + +Vector<NumberFormat> get_compact_number_system_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView system, [[maybe_unused]] CompactNumberFormatType type) +{ +#if ENABLE_UNICODE_DATA + return Detail::get_compact_number_system_formats(locale, system, type); +#else + return {}; +#endif +} + +Vector<NumberFormat> get_unit_formats([[maybe_unused]] StringView locale, [[maybe_unused]] StringView unit, [[maybe_unused]] Style style) +{ +#if ENABLE_UNICODE_DATA + return Detail::get_unit_formats(locale, unit, style); +#else + return {}; +#endif +} + +Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number) +{ + // FIXME: This is a rather naive and locale-unaware implementation Unicode's TR-35 pluralization + // rules: https://www.unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules + // Once those rules are implemented for LibJS, we better use them instead. + auto find_plurality = [&](auto plurality) -> Optional<NumberFormat> { + if (auto it = formats.find_if([&](auto& patterns) { return patterns.plurality == plurality; }); it != formats.end()) + return *it; + return {}; + }; + + if (number == 0) { + if (auto patterns = find_plurality(NumberFormat::Plurality::Zero); patterns.has_value()) + return patterns; + } else if (number == 1) { + if (auto patterns = find_plurality(NumberFormat::Plurality::One); patterns.has_value()) + return patterns; + } else if (number == 2) { + if (auto patterns = find_plurality(NumberFormat::Plurality::Two); patterns.has_value()) + return patterns; + } else if (number > 2) { + if (auto patterns = find_plurality(NumberFormat::Plurality::Many); patterns.has_value()) + return patterns; + } + + return find_plurality(NumberFormat::Plurality::Other); +} + +// https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies +Optional<String> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern) +{ +#if ENABLE_UNICODE_DATA + constexpr auto number_key = "{number}"sv; + constexpr auto currency_key = "{currency}"sv; + constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP) + + auto number_index = base_pattern.find(number_key); + VERIFY(number_index.has_value()); + + auto currency_index = base_pattern.find(currency_key); + VERIFY(currency_index.has_value()); + + Utf8View utf8_currency_display { currency_display }; + Optional<String> currency_key_with_spacing; + + auto last_code_point = [](StringView string) { + Utf8View utf8_string { string }; + u32 code_point = 0; + + for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it) + code_point = *it; + + return code_point; + }; + + if (*number_index < *currency_index) { + u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index)); + + if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) { + u32 first_currency_code_point = *utf8_currency_display.begin(); + + if (!code_point_has_general_category(first_currency_code_point, GeneralCategory::Symbol)) + currency_key_with_spacing = String::formatted("{}{}", spacing, currency_key); + } + } else { + u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index)); + + if (!code_point_has_general_category(last_pattern_code_point, GeneralCategory::Separator)) { + u32 last_currency_code_point = last_code_point(currency_display); + + if (!code_point_has_general_category(last_currency_code_point, GeneralCategory::Symbol)) + currency_key_with_spacing = String::formatted("{}{}", currency_key, spacing); + } + } + + if (currency_key_with_spacing.has_value()) + return base_pattern.replace(currency_key, *currency_key_with_spacing); +#endif + + return {}; +} + +} diff --git a/Userland/Libraries/LibUnicode/NumberFormat.h b/Userland/Libraries/LibUnicode/NumberFormat.h new file mode 100644 index 0000000000..b56e870ad7 --- /dev/null +++ b/Userland/Libraries/LibUnicode/NumberFormat.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Optional.h> +#include <AK/String.h> +#include <AK/StringView.h> +#include <AK/Vector.h> +#include <LibUnicode/Forward.h> + +namespace Unicode { + +struct NumberGroupings { + u8 primary_grouping_size { 0 }; + u8 secondary_grouping_size { 0 }; +}; + +enum class StandardNumberFormatType : u8 { + Decimal, + Currency, + Accounting, + Percent, + Scientific, +}; + +enum class CompactNumberFormatType : u8 { + DecimalLong, + DecimalShort, + CurrencyUnit, + CurrencyShort, +}; + +struct NumberFormat { + enum class Plurality : u8 { + Other, + Zero, + Single, + One, + Two, + Few, + Many, + }; + + u8 magnitude { 0 }; + u8 exponent { 0 }; + Plurality plurality { Plurality::Other }; + StringView zero_format {}; + StringView positive_format {}; + StringView negative_format {}; + Vector<StringView> identifiers {}; +}; + +Optional<StringView> get_number_system_symbol(StringView locale, StringView system, StringView symbol); +Optional<NumberGroupings> get_number_system_groupings(StringView locale, StringView system); +Optional<NumberFormat> get_standard_number_system_format(StringView locale, StringView system, StandardNumberFormatType type); +Vector<NumberFormat> get_compact_number_system_formats(StringView locale, StringView system, CompactNumberFormatType type); +Vector<NumberFormat> get_unit_formats(StringView locale, StringView unit, Style style); +Optional<NumberFormat> select_pattern_with_plurality(Vector<NumberFormat> const& formats, double number); +Optional<String> augment_currency_format_pattern(StringView currency_display, StringView base_pattern); + +} |