summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibUnicode
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2022-07-07 09:44:17 -0400
committerLinus Groh <mail@linusgroh.de>2022-07-08 11:51:54 +0200
commitea78bac36db527abe0e80dce17dc03372e4f7065 (patch)
treebb134174d3d3a0e027c7877b49a81b96e97cd20e /Userland/Libraries/LibUnicode
parent8de395694d39c4b12efbc3b3598be5c1cc80cdb0 (diff)
downloadserenity-ea78bac36db527abe0e80dce17dc03372e4f7065.zip
LibUnicode: Parse and generate per-locale plural rules from the CLDR
Plural rules in the CLDR are of the form: "cs": { "pluralRule-count-one": "i = 1 and v = 0 @integer 1", "pluralRule-count-few": "i = 2..4 and v = 0 @integer 2~4", "pluralRule-count-many": "v != 0 @decimal 0.0~1.5, 10.0, 100.0 ...", "pluralRule-count-other": "@integer 0, 5~19, 100, 1000, 10000 ..." } The syntax is described here: https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax There are up to 2 sets of rules for each locale, a cardinal set and an ordinal set. The approach here is to generate a C++ function for each set of rules. Each condition in the rules (e.g. "i = 1 and v = 0") is transpiled to a C++ if-statement within its function. Then lookup tables are generated to match locales to their generated functions. NOTE: -Wno-parentheses-equality is added to the LibUnicodeData compile flags because the generated plural rules have lots of extra parentheses (because e.g. we need to selectively negate and combine rules). The code to generate only exactly the right number of parentheses is quite hairy, so this just tells the compiler to ignore the extras.
Diffstat (limited to 'Userland/Libraries/LibUnicode')
-rw-r--r--Userland/Libraries/LibUnicode/CMakeLists.txt3
-rw-r--r--Userland/Libraries/LibUnicode/Forward.h2
-rw-r--r--Userland/Libraries/LibUnicode/PluralRules.cpp59
-rw-r--r--Userland/Libraries/LibUnicode/PluralRules.h65
4 files changed, 128 insertions, 1 deletions
diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt
index 4f9c5527b3..701489311d 100644
--- a/Userland/Libraries/LibUnicode/CMakeLists.txt
+++ b/Userland/Libraries/LibUnicode/CMakeLists.txt
@@ -3,7 +3,7 @@ include(${SerenityOS_SOURCE_DIR}/Meta/CMake/unicode_data.cmake)
if (DEFINED UNICODE_DATA_SOURCES)
set(SOURCES ${UNICODE_DATA_SOURCES})
serenity_lib(LibUnicodeData unicodedata)
- target_compile_options(LibUnicodeData PRIVATE -g0 -Os)
+ target_compile_options(LibUnicodeData PRIVATE -g0 -Os -Wno-parentheses-equality)
target_link_libraries(LibUnicodeData LibCore LibTimeZone)
endif()
@@ -13,6 +13,7 @@ set(SOURCES
DateTimeFormat.cpp
Locale.cpp
NumberFormat.cpp
+ PluralRules.cpp
RelativeTimeFormat.cpp
)
diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h
index 4edea7b0f5..252da3bab5 100644
--- a/Userland/Libraries/LibUnicode/Forward.h
+++ b/Userland/Libraries/LibUnicode/Forward.h
@@ -38,6 +38,7 @@ enum class Locale : u16;
enum class MinimumDaysRegion : u8;
enum class Month : u8;
enum class NumericSymbol : u8;
+enum class PluralCategory : u8;
enum class Property : u8;
enum class Script : u8;
enum class ScriptTag : u8;
@@ -62,6 +63,7 @@ struct LocaleID;
struct NumberFormat;
struct NumberGroupings;
struct OtherExtension;
+struct PluralOperands;
struct SpecialCasing;
struct TransformedExtension;
struct TransformedField;
diff --git a/Userland/Libraries/LibUnicode/PluralRules.cpp b/Userland/Libraries/LibUnicode/PluralRules.cpp
new file mode 100644
index 0000000000..a4b8a58a7f
--- /dev/null
+++ b/Userland/Libraries/LibUnicode/PluralRules.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibUnicode/PluralRules.h>
+
+#if ENABLE_UNICODE_DATA
+# include <LibUnicode/UnicodePluralRules.h>
+#endif
+
+namespace Unicode {
+
+#if !ENABLE_UNICODE_DATA
+enum class PluralCategory : u8 {
+ Other,
+};
+#endif
+
+PluralForm plural_form_from_string(StringView plural_form)
+{
+ if (plural_form == "cardinal"sv)
+ return PluralForm::Cardinal;
+ if (plural_form == "ordinal"sv)
+ return PluralForm::Ordinal;
+ VERIFY_NOT_REACHED();
+}
+
+StringView plural_form_to_string(PluralForm plural_form)
+{
+ switch (plural_form) {
+ case PluralForm::Cardinal:
+ return "cardinal"sv;
+ case PluralForm::Ordinal:
+ return "ordinal"sv;
+ default:
+ VERIFY_NOT_REACHED();
+ }
+}
+
+Optional<PluralCategory> __attribute__((weak)) plural_category_from_string(StringView category)
+{
+ VERIFY(category == "other"sv);
+ return PluralCategory::Other;
+}
+
+StringView __attribute__((weak)) plural_category_to_string(PluralCategory category)
+{
+ VERIFY(category == PluralCategory::Other);
+ return "other"sv;
+}
+
+PluralCategory __attribute__((weak)) determine_plural_category(StringView, PluralForm, PluralOperands)
+{
+ return PluralCategory::Other;
+}
+
+}
diff --git a/Userland/Libraries/LibUnicode/PluralRules.h b/Userland/Libraries/LibUnicode/PluralRules.h
new file mode 100644
index 0000000000..d67f95b805
--- /dev/null
+++ b/Userland/Libraries/LibUnicode/PluralRules.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Assertions.h>
+#include <AK/StringView.h>
+#include <AK/Types.h>
+#include <LibUnicode/Forward.h>
+
+namespace Unicode {
+
+enum class PluralForm {
+ Cardinal,
+ Ordinal,
+};
+
+// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Operand_Meanings
+struct PluralOperands {
+ static constexpr StringView symbol_to_variable_name(char symbol)
+ {
+ if (symbol == 'n')
+ return "number"sv;
+ if (symbol == 'i')
+ return "integer_digits"sv;
+ if (symbol == 'f')
+ return "fraction_digits"sv;
+ if (symbol == 'v')
+ return "number_of_fraction_digits"sv;
+ if (symbol == 't')
+ return "fraction_digits_without_trailing"sv;
+ if (symbol == 'w')
+ return "number_of_fraction_digits_without_trailing"sv;
+ VERIFY_NOT_REACHED();
+ }
+
+ static constexpr bool symbol_requires_floating_point_modulus(char symbol)
+ {
+ // From TR-35: "The modulus (% or mod) is a remainder operation as defined in Java; for
+ // example, where n = 4.3 the result of n mod 3 is 1.3."
+ //
+ // So, this returns whether the symbol represents a decimal value, and thus requires fmod.
+ return symbol == 'n';
+ }
+
+ double number { 0 };
+ u64 integer_digits { 0 };
+ u64 fraction_digits { 0 };
+ u64 number_of_fraction_digits { 0 };
+ u64 fraction_digits_without_trailing { 0 };
+ u64 number_of_fraction_digits_without_trailing { 0 };
+};
+
+PluralForm plural_form_from_string(StringView plural_form);
+StringView plural_form_to_string(PluralForm plural_form);
+
+Optional<PluralCategory> plural_category_from_string(StringView category);
+StringView plural_category_to_string(PluralCategory category);
+
+PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands);
+
+}