summaryrefslogtreecommitdiff
path: root/Meta
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2022-01-26 10:49:58 -0500
committerLinus Groh <mail@linusgroh.de>2022-01-27 21:16:44 +0000
commit789f093b2eb5a63a651d7e02bc34b71aad28438d (patch)
tree04f92e61694daee4d441aca22332ffe3e1c5469e /Meta
parent27eda77c9736361f15407c29e5258f635b1cd8d5 (diff)
downloadserenity-789f093b2eb5a63a651d7e02bc34b71aad28438d.zip
LibUnicode: Parse and generate relative-time format patterns
Relative-time format patterns are of one of two forms: * Tensed - refer to the past or the future, e.g. "N years ago" or "in N years". * Numbered - refer to a specific numeric value, e.g. "in 1 year" becomes "next year" and "in 0 years" becomes "this year". In ECMA-402, tensed and numbered refer to the numeric formatting options of "always" and "auto", respectively.
Diffstat (limited to 'Meta')
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp189
1 files changed, 188 insertions, 1 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp
index 3df4a84bda..7239d3046b 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp
@@ -17,18 +17,133 @@
#include <LibCore/ArgsParser.h>
#include <LibCore/DirIterator.h>
#include <LibCore/File.h>
+#include <LibUnicode/Locale.h>
+#include <LibUnicode/RelativeTimeFormat.h>
using StringIndexType = u16;
constexpr auto s_string_index_type = "u16"sv;
+using RelativeTimeFormatIndexType = u16;
+constexpr auto s_relative_time_format_index_type = "u16"sv;
+
+struct RelativeTimeFormat {
+ unsigned hash() const
+ {
+ auto hash = time_unit.hash();
+ hash = pair_int_hash(hash, style.hash());
+ hash = pair_int_hash(hash, plurality.hash());
+ hash = pair_int_hash(hash, tense_or_number);
+ hash = pair_int_hash(hash, pattern);
+ return hash;
+ }
+
+ bool operator==(RelativeTimeFormat const& other) const
+ {
+ return (time_unit == other.time_unit)
+ && (plurality == other.plurality)
+ && (style == other.style)
+ && (tense_or_number == other.tense_or_number)
+ && (pattern == other.pattern);
+ }
+
+ String time_unit;
+ String style;
+ String plurality;
+ StringIndexType tense_or_number { 0 };
+ StringIndexType pattern { 0 };
+};
+
+template<>
+struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
+ ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
+ {
+ return Formatter<FormatString>::format(builder,
+ "{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}",
+ format.time_unit,
+ format.style,
+ format.plurality,
+ format.tense_or_number,
+ format.pattern);
+ }
+};
+
+template<>
+struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
+ static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
+};
+
struct Locale {
+ Vector<RelativeTimeFormatIndexType> time_units;
};
struct UnicodeLocaleData {
UniqueStringStorage<StringIndexType> unique_strings;
+ UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats;
+
HashMap<String, Locale> locales;
};
+static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale)
+{
+ LexicalPath date_fields_path(move(locale_dates_path));
+ date_fields_path = date_fields_path.append("dateFields.json"sv);
+
+ auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly));
+ auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all()));
+
+ auto const& main_object = date_fields.as_object().get("main"sv);
+ auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename());
+ auto const& dates_object = locale_object.as_object().get("dates"sv);
+ auto const& fields_object = dates_object.as_object().get("fields"sv);
+
+ auto is_sanctioned_unit = [](auto unit) {
+ // This is a copy of the time units sanctioned for use within ECMA-402.
+ // https://tc39.es/ecma402/#sec-singularrelativetimeunit
+ return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
+ };
+
+ auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
+ RelativeTimeFormat format {};
+ format.time_unit = unit.to_titlecase_string();
+ format.style = style.to_titlecase_string();
+ format.plurality = plurality.to_titlecase_string();
+ format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number);
+ format.pattern = locale_data.unique_strings.ensure(pattern.as_string());
+
+ locale.time_units.append(locale_data.unique_formats.ensure(move(format)));
+ };
+
+ fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) {
+ auto segments = unit_and_style.split_view('-');
+ auto unit = segments[0];
+ auto style = (segments.size() > 1) ? segments[1] : "long"sv;
+
+ if (!is_sanctioned_unit(unit))
+ return;
+
+ patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
+ constexpr auto number_key = "relative-type-"sv;
+ constexpr auto tense_key = "relativeTime-type-"sv;
+ constexpr auto plurality_key = "relativeTimePattern-count-"sv;
+
+ if (type.starts_with(number_key)) {
+ auto number = type.substring_view(number_key.length());
+ parse_pattern(unit, style, "Other"sv, number, pattern_value);
+ } else if (type.starts_with(tense_key)) {
+ pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
+ VERIFY(key.starts_with(plurality_key));
+ auto plurality = key.substring_view(plurality_key.length());
+ auto tense = type.substring_view(tense_key.length());
+
+ parse_pattern(unit, style, plurality, tense, pattern);
+ });
+ }
+ });
+ });
+
+ return {};
+}
+
static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
{
auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
@@ -50,7 +165,8 @@ static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& loc
auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
auto language = TRY(remove_variants_from_path(dates_path));
- [[maybe_unused]] auto& locale = locale_data.locales.ensure(language);
+ auto& locale = locale_data.locales.ensure(language);
+ TRY(parse_date_fields(move(dates_path), locale_data, locale));
}
return {};
@@ -81,10 +197,14 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
StringBuilder builder;
SourceGenerator generator { builder };
generator.set("string_index_type"sv, s_string_index_type);
+ generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type);
generator.append(R"~~~(
#include <AK/Array.h>
#include <AK/StringView.h>
+#include <AK/Vector.h>
+#include <LibUnicode/Locale.h>
+#include <LibUnicode/RelativeTimeFormat.h>
#include <LibUnicode/UnicodeRelativeTimeFormat.h>
namespace Unicode {
@@ -93,6 +213,73 @@ namespace Unicode {
locale_data.unique_strings.generate(generator);
generator.append(R"~~~(
+struct RelativeTimeFormatImpl {
+ RelativeTimeFormat to_relative_time_format() const
+ {
+ RelativeTimeFormat relative_time_format {};
+ relative_time_format.plurality = plurality;
+ relative_time_format.pattern = s_string_list[pattern];
+
+ return relative_time_format;
+ }
+
+ TimeUnit time_unit;
+ Style style;
+ RelativeTimeFormat::Plurality plurality;
+ @string_index_type@ tense_or_number { 0 };
+ @string_index_type@ pattern { 0 };
+};
+)~~~");
+
+ locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
+
+ auto append_list = [&](String name, auto const& list) {
+ generator.set("name", name);
+ generator.set("size", String::number(list.size()));
+
+ generator.append(R"~~~(
+static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
+
+ bool first = true;
+ for (auto index : list) {
+ generator.append(first ? " " : ", ");
+ generator.append(String::number(index));
+ first = false;
+ }
+
+ generator.append(" } };");
+ };
+
+ generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
+
+ generator.append(R"~~~(
+Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
+{
+ Vector<RelativeTimeFormat> formats;
+
+ auto locale_value = locale_from_string(locale);
+ if (!locale_value.has_value())
+ return formats;
+
+ auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
+ auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
+
+ for (auto const& locale_format_index : locale_formats) {
+ auto const& locale_format = s_relative_time_formats.at(locale_format_index);
+
+ if (locale_format.time_unit != time_unit)
+ continue;
+ if (locale_format.style != style)
+ continue;
+ if (s_string_list[locale_format.tense_or_number] != tense_or_number)
+ continue;
+
+ formats.append(locale_format.to_relative_time_format());
+ }
+
+ return formats;
+}
+
}
)~~~");