diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-11-28 10:39:55 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-11-29 22:48:46 +0000 |
commit | 71903ea7e1213f67e076cb0fc7350a6863152448 (patch) | |
tree | 2a8425fcbefb9a2f18e03931e0887a08f12f30c4 | |
parent | 48ce72e472e1ea5ba01b1ac8dc2bc409840cd23b (diff) | |
download | serenity-71903ea7e1213f67e076cb0fc7350a6863152448.zip |
LibUnicode: Parse and generate calendar (ca) Unicode keywords
Also removes a few fly-by "StringView x = nullptr;" unnecessary
initializers.
4 files changed, 78 insertions, 13 deletions
diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index 794a674d98..2d71f78fe4 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -198,7 +198,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) Lagom::GenerateUnicodeLocale "${UNICODE_LOCALE_HEADER}" "${UNICODE_LOCALE_IMPLEMENTATION}" - arguments -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" -m "${CLDR_MISC_PATH}" -n "${CLDR_NUMBERS_PATH}" + arguments -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" -m "${CLDR_MISC_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" ) invoke_generator( "UnicodeNumberFormat" diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index 2700571676..89ed709092 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -372,7 +372,9 @@ static constexpr Array<CalendarData, @size@> @name@ { {)~~~"); )~~~"); }; - auto append_hour_cycles = [&](String name, auto const& hour_cycles) { + auto append_hour_cycles = [&](String name, auto const& hour_cycle_region) { + auto const& hour_cycles = locale_data.hour_cycles.find(hour_cycle_region)->value; + generator.set("name", name); generator.set("size", String::number(hour_cycles.size())); @@ -388,7 +390,7 @@ static constexpr Array<u8, @size@> @name@ { { )~~~"); }; generate_mapping(generator, locale_data.locales, "CalendarData"sv, "s_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); }); - generate_mapping(generator, locale_data.hour_cycles, "u8"sv, "s_hour_cycles"sv, "s_hour_cycles_{}", [&](auto const& name, auto const& value) { append_hour_cycles(name, value); }); + generate_mapping(generator, locale_data.hour_cycle_regions, "u8"sv, "s_hour_cycles"sv, "s_hour_cycles_{}", [&](auto const& name, auto const& value) { append_hour_cycles(name, value); }); auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) { HashValueMap<String> hashes; diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp index bc2ec9e344..c669cdf7c6 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp @@ -384,6 +384,50 @@ static ErrorOr<void> parse_numeric_keywords(String locale_numbers_path, UnicodeL return {}; } +static ErrorOr<void> parse_calendar_keywords(String locale_dates_path, UnicodeLocaleData& locale_data, Locale& locale) +{ + static constexpr StringView key = "ca"sv; + + auto calendars_iterator = TRY(path_to_dir_iterator(locale_dates_path, {})); + Vector<String> keyword_values {}; + + while (calendars_iterator.has_next()) { + auto locale_calendars_path = TRY(next_path_from_dir_iterator(calendars_iterator)); + + LexicalPath calendars_path(move(locale_calendars_path)); + if (!calendars_path.basename().starts_with("ca-"sv)) + continue; + + auto calendars_file = TRY(Core::File::open(calendars_path.string(), Core::OpenMode::ReadOnly)); + auto calendars = TRY(JsonValue::from_string(calendars_file->read_all())); + + auto const& main_object = calendars.as_object().get("main"sv); + auto const& locale_object = main_object.as_object().get(calendars_path.parent().basename()); + auto const& dates_object = locale_object.as_object().get("dates"sv); + auto const& calendars_object = dates_object.as_object().get("calendars"sv); + + calendars_object.as_object().for_each_member([&](auto const& calendar_name, JsonValue const&) { + keyword_values.append(calendar_name); + + // FIXME: Similar to the calendar aliases defined in GenerateUnicodeDateTimeFormat, this + // should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158 + if (calendar_name == "gregorian"sv) + keyword_values.append("gregory"sv); + }); + } + + StringBuilder builder; + builder.join(',', keyword_values); + + auto index = locale_data.unique_strings.ensure(builder.build()); + locale.keywords.set(key, index); + + if (!locale_data.keywords.contains_slow(key)) + locale_data.keywords.append(key); + + return {}; +} + static ErrorOr<void> parse_default_content_locales(String core_path, UnicodeLocaleData& locale_data) { LexicalPath default_content_path(move(core_path)); @@ -459,12 +503,13 @@ static ErrorOr<void> define_aliases_without_scripts(UnicodeLocaleData& locale_da return {}; } -static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data) +static ErrorOr<void> parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, String dates_path, UnicodeLocaleData& locale_data) { auto identity_iterator = TRY(path_to_dir_iterator(locale_names_path)); auto locale_names_iterator = TRY(path_to_dir_iterator(move(locale_names_path))); auto misc_iterator = TRY(path_to_dir_iterator(move(misc_path))); auto numbers_iterator = TRY(path_to_dir_iterator(move(numbers_path))); + auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); LexicalPath core_supplemental_path(core_path); core_supplemental_path = core_supplemental_path.append("supplemental"sv); @@ -521,6 +566,14 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat TRY(parse_numeric_keywords(numbers_path, locale_data, locale)); } + while (dates_iterator.has_next()) { + auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator)); + auto language = TRY(remove_variants_from_path(dates_path)); + + auto& locale = locale_data.locales.ensure(language); + TRY(parse_calendar_keywords(dates_path, locale_data, locale)); + } + TRY(parse_default_content_locales(move(core_path), locale_data)); TRY(define_aliases_without_scripts(locale_data)); @@ -1093,12 +1146,13 @@ Optional<String> resolve_most_likely_territory(Unicode::LanguageID const& langua ErrorOr<int> serenity_main(Main::Arguments arguments) { - StringView generated_header_path = nullptr; - StringView generated_implementation_path = nullptr; - StringView core_path = nullptr; - StringView locale_names_path = nullptr; - StringView misc_path = nullptr; - StringView numbers_path = nullptr; + StringView generated_header_path; + StringView generated_implementation_path; + StringView core_path; + StringView locale_names_path; + StringView misc_path; + StringView numbers_path; + StringView dates_path; Core::ArgsParser args_parser; args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); @@ -1107,6 +1161,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments) args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path"); args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path"); + args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path"); args_parser.parse(arguments); auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> { @@ -1122,7 +1177,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments) auto generated_implementation_file = TRY(open_file(generated_implementation_path)); UnicodeLocaleData locale_data; - TRY(parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, locale_data)); + TRY(parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, dates_path, locale_data)); generate_unicode_locale_header(generated_header_file, locale_data); generate_unicode_locale_implementation(generated_implementation_file, locale_data); diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h index a2acd2de83..8f7f7e216a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h @@ -291,8 +291,16 @@ void generate_mapping(SourceGenerator& generator, LocalesType const& locales, St Vector<String> mapping_names; for (auto const& locale : locales) { - auto mapping_name = format_mapping_name(format, locale.key); - format_list(mapping_name, locale.value); + String mapping_name; + + if constexpr (requires { locale.key; }) { + mapping_name = format_mapping_name(format, locale.key); + format_list(mapping_name, locale.value); + } else { + mapping_name = format_mapping_name(format, locale); + format_list(mapping_name, locale); + } + mapping_names.append(move(mapping_name)); } |