diff options
author | Timothy Flynn <trflynn89@pm.me> | 2022-01-26 10:49:58 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2022-01-27 21:16:44 +0000 |
commit | 789f093b2eb5a63a651d7e02bc34b71aad28438d (patch) | |
tree | 04f92e61694daee4d441aca22332ffe3e1c5469e | |
parent | 27eda77c9736361f15407c29e5258f635b1cd8d5 (diff) | |
download | serenity-789f093b2eb5a63a651d7e02bc34b71aad28438d.zip |
LibUnicode: Parse and generate relative-time format patterns
Relative-time format patterns are of one of two forms:
* Tensed - refer to the past or the future, e.g. "N years ago" or
"in N years".
* Numbered - refer to a specific numeric value, e.g. "in 1 year"
becomes "next year" and "in 0 years" becomes "this year".
In ECMA-402, tensed and numbered refer to the numeric formatting options
of "always" and "auto", respectively.
4 files changed, 295 insertions, 1 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp index 3df4a84bda..7239d3046b 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeRelativeTimeFormat.cpp @@ -17,18 +17,133 @@ #include <LibCore/ArgsParser.h> #include <LibCore/DirIterator.h> #include <LibCore/File.h> +#include <LibUnicode/Locale.h> +#include <LibUnicode/RelativeTimeFormat.h> using StringIndexType = u16; constexpr auto s_string_index_type = "u16"sv; +using RelativeTimeFormatIndexType = u16; +constexpr auto s_relative_time_format_index_type = "u16"sv; + +struct RelativeTimeFormat { + unsigned hash() const + { + auto hash = time_unit.hash(); + hash = pair_int_hash(hash, style.hash()); + hash = pair_int_hash(hash, plurality.hash()); + hash = pair_int_hash(hash, tense_or_number); + hash = pair_int_hash(hash, pattern); + return hash; + } + + bool operator==(RelativeTimeFormat const& other) const + { + return (time_unit == other.time_unit) + && (plurality == other.plurality) + && (style == other.style) + && (tense_or_number == other.tense_or_number) + && (pattern == other.pattern); + } + + String time_unit; + String style; + String plurality; + StringIndexType tense_or_number { 0 }; + StringIndexType pattern { 0 }; +}; + +template<> +struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> { + ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format) + { + return Formatter<FormatString>::format(builder, + "{{ TimeUnit::{}, Style::{}, RelativeTimeFormat::Plurality::{}, {}, {} }}", + format.time_unit, + format.style, + format.plurality, + format.tense_or_number, + format.pattern); + } +}; + +template<> +struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> { + static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); } +}; + struct Locale { + Vector<RelativeTimeFormatIndexType> time_units; }; struct UnicodeLocaleData { UniqueStringStorage<StringIndexType> unique_strings; + UniqueStorage<RelativeTimeFormat, RelativeTimeFormatIndexType> unique_formats; + HashMap<String, Locale> locales; }; +static ErrorOr<void> parse_date_fields(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale) +{ + LexicalPath date_fields_path(move(locale_dates_path)); + date_fields_path = date_fields_path.append("dateFields.json"sv); + + auto date_fields_file = TRY(Core::File::open(date_fields_path.string(), Core::OpenMode::ReadOnly)); + auto date_fields = TRY(JsonValue::from_string(date_fields_file->read_all())); + + auto const& main_object = date_fields.as_object().get("main"sv); + auto const& locale_object = main_object.as_object().get(date_fields_path.parent().basename()); + auto const& dates_object = locale_object.as_object().get("dates"sv); + auto const& fields_object = dates_object.as_object().get("fields"sv); + + auto is_sanctioned_unit = [](auto unit) { + // This is a copy of the time units sanctioned for use within ECMA-402. + // https://tc39.es/ecma402/#sec-singularrelativetimeunit + return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv); + }; + + auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) { + RelativeTimeFormat format {}; + format.time_unit = unit.to_titlecase_string(); + format.style = style.to_titlecase_string(); + format.plurality = plurality.to_titlecase_string(); + format.tense_or_number = locale_data.unique_strings.ensure(tense_or_number); + format.pattern = locale_data.unique_strings.ensure(pattern.as_string()); + + locale.time_units.append(locale_data.unique_formats.ensure(move(format))); + }; + + fields_object.as_object().for_each_member([&](auto const& unit_and_style, auto const& patterns) { + auto segments = unit_and_style.split_view('-'); + auto unit = segments[0]; + auto style = (segments.size() > 1) ? segments[1] : "long"sv; + + if (!is_sanctioned_unit(unit)) + return; + + patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) { + constexpr auto number_key = "relative-type-"sv; + constexpr auto tense_key = "relativeTime-type-"sv; + constexpr auto plurality_key = "relativeTimePattern-count-"sv; + + if (type.starts_with(number_key)) { + auto number = type.substring_view(number_key.length()); + parse_pattern(unit, style, "Other"sv, number, pattern_value); + } else if (type.starts_with(tense_key)) { + pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) { + VERIFY(key.starts_with(plurality_key)); + auto plurality = key.substring_view(plurality_key.length()); + auto tense = type.substring_view(tense_key.length()); + + parse_pattern(unit, style, plurality, tense, pattern); + }); + } + }); + }); + + return {}; +} + static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data) { auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); @@ -50,7 +165,8 @@ static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& loc auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator)); auto language = TRY(remove_variants_from_path(dates_path)); - [[maybe_unused]] auto& locale = locale_data.locales.ensure(language); + auto& locale = locale_data.locales.ensure(language); + TRY(parse_date_fields(move(dates_path), locale_data, locale)); } return {}; @@ -81,10 +197,14 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca StringBuilder builder; SourceGenerator generator { builder }; generator.set("string_index_type"sv, s_string_index_type); + generator.set("relative_time_format_index_type"sv, s_relative_time_format_index_type); generator.append(R"~~~( #include <AK/Array.h> #include <AK/StringView.h> +#include <AK/Vector.h> +#include <LibUnicode/Locale.h> +#include <LibUnicode/RelativeTimeFormat.h> #include <LibUnicode/UnicodeRelativeTimeFormat.h> namespace Unicode { @@ -93,6 +213,73 @@ namespace Unicode { locale_data.unique_strings.generate(generator); generator.append(R"~~~( +struct RelativeTimeFormatImpl { + RelativeTimeFormat to_relative_time_format() const + { + RelativeTimeFormat relative_time_format {}; + relative_time_format.plurality = plurality; + relative_time_format.pattern = s_string_list[pattern]; + + return relative_time_format; + } + + TimeUnit time_unit; + Style style; + RelativeTimeFormat::Plurality plurality; + @string_index_type@ tense_or_number { 0 }; + @string_index_type@ pattern { 0 }; +}; +)~~~"); + + locale_data.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10); + + auto append_list = [&](String name, auto const& list) { + generator.set("name", name); + generator.set("size", String::number(list.size())); + + generator.append(R"~~~( +static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~"); + + bool first = true; + for (auto index : list) { + generator.append(first ? " " : ", "); + generator.append(String::number(index)); + first = false; + } + + generator.append(" } };"); + }; + + generate_mapping(generator, locale_data.locales, s_relative_time_format_index_type, "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}", nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); }); + + generator.append(R"~~~( +Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style) +{ + Vector<RelativeTimeFormat> formats; + + auto locale_value = locale_from_string(locale); + if (!locale_value.has_value()) + return formats; + + auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. + auto const& locale_formats = s_locale_relative_time_formats.at(locale_index); + + for (auto const& locale_format_index : locale_formats) { + auto const& locale_format = s_relative_time_formats.at(locale_format_index); + + if (locale_format.time_unit != time_unit) + continue; + if (locale_format.style != style) + continue; + if (s_string_list[locale_format.tense_or_number] != tense_or_number) + continue; + + formats.append(locale_format.to_relative_time_format()); + } + + return formats; +} + } )~~~"); diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index c59470cca0..4f9c5527b3 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -13,6 +13,7 @@ set(SOURCES DateTimeFormat.cpp Locale.cpp NumberFormat.cpp + RelativeTimeFormat.cpp ) serenity_lib(LibUnicode unicode) diff --git a/Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp b/Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp new file mode 100644 index 0000000000..45a5529f0c --- /dev/null +++ b/Userland/Libraries/LibUnicode/RelativeTimeFormat.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <LibUnicode/RelativeTimeFormat.h> + +namespace Unicode { + +Optional<TimeUnit> time_unit_from_string(StringView time_unit) +{ + if (time_unit == "second"sv) + return TimeUnit::Second; + if (time_unit == "minute"sv) + return TimeUnit::Minute; + if (time_unit == "hour"sv) + return TimeUnit::Hour; + if (time_unit == "day"sv) + return TimeUnit::Day; + if (time_unit == "week"sv) + return TimeUnit::Week; + if (time_unit == "month"sv) + return TimeUnit::Month; + if (time_unit == "quarter"sv) + return TimeUnit::Quarter; + if (time_unit == "year"sv) + return TimeUnit::Year; + return {}; +} + +StringView time_unit_to_string(TimeUnit time_unit) +{ + switch (time_unit) { + case TimeUnit::Second: + return "second"sv; + case TimeUnit::Minute: + return "minute"sv; + case TimeUnit::Hour: + return "hour"sv; + case TimeUnit::Day: + return "day"sv; + case TimeUnit::Week: + return "week"sv; + case TimeUnit::Month: + return "month"sv; + case TimeUnit::Quarter: + return "quarter"sv; + case TimeUnit::Year: + return "year"sv; + default: + VERIFY_NOT_REACHED(); + } +} + +Vector<RelativeTimeFormat> __attribute__((weak)) get_relative_time_format_patterns(StringView, TimeUnit, StringView, Style) { return {}; } + +} diff --git a/Userland/Libraries/LibUnicode/RelativeTimeFormat.h b/Userland/Libraries/LibUnicode/RelativeTimeFormat.h new file mode 100644 index 0000000000..f07016a7fe --- /dev/null +++ b/Userland/Libraries/LibUnicode/RelativeTimeFormat.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2022, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Optional.h> +#include <AK/StringView.h> +#include <AK/Vector.h> +#include <LibUnicode/Forward.h> +#include <LibUnicode/Locale.h> + +namespace Unicode { + +// These are just the subset of fields in the CLDR required for ECMA-402. +enum class TimeUnit { + Second, + Minute, + Hour, + Day, + Week, + Month, + Quarter, + Year, +}; + +struct RelativeTimeFormat { + enum class Plurality { + Zero, + One, + Two, + Few, + Many, + Other, + }; + + Plurality plurality { Plurality::Other }; + StringView pattern; +}; + +Optional<TimeUnit> time_unit_from_string(StringView time_unit); +StringView time_unit_to_string(TimeUnit time_unit); + +Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style); + +} |