diff options
author | Timothy Flynn <trflynn89@pm.me> | 2022-10-19 15:34:17 -0400 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2022-10-25 10:10:39 +0100 |
commit | b077fccd3d7a9ed9a22d15c78cc8067cebd4aa26 (patch) | |
tree | e8e481b4ab39612e6110244acdda6f478c00b114 /Meta | |
parent | b87398341babf971fae3996615283039326538e3 (diff) | |
download | serenity-b077fccd3d7a9ed9a22d15c78cc8067cebd4aa26.zip |
LibLocale+LibJS: Update to CLDR version 42.0.0
There were some notable changes to the CLDR JSON format and data in this
release.
The patterns for a date at a specific time, i.e. "{date} at {time}", now
appear under the "atTime" attribute of the "dateTimeFormats" object.
Locale specific changes that affected test-js:
All locales:
* In many patterns, the code points U+00A0 (NO-BREAK SPACE) and U+202F
(NARROW NO-BREAK SPACE) are now used in place of an ASCII space. For
example, before the "dayPeriod" fields AM and PM.
* Separators such as U+2013 (EN DASH) are now surrounded by U+2009 (THIN
SPACE) in place of an ASCII space character.
Locale "en":
* Narrow localizations of time formats are even more narrow. For
example, the abbreviation "wk." for "week" is now just "wk".
Locale "ar":
* The code point U+060C (ARABIC COMMA) is now used in place of an ASCII
comma.
* The code point U+200F (RIGHT-TO-LEFT MARK) now appears at the
beginning of many localizations.
* When the "latn" numbering system is used for currency formatting, the
currency symbol more consistently is placed at the end of the pattern.
Locale "he":
* The "many" plural rules category has been removed.
Locales "zh" and "es-419":
* Several display-name localizations were changed.
Diffstat (limited to 'Meta')
3 files changed, 36 insertions, 19 deletions
diff --git a/Meta/CMake/locale_data.cmake b/Meta/CMake/locale_data.cmake index a548ff67e6..292247db55 100644 --- a/Meta/CMake/locale_data.cmake +++ b/Meta/CMake/locale_data.cmake @@ -1,6 +1,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake) -set(CLDR_VERSION 41.0.0) +set(CLDR_VERSION 42.0.0) set(CLDR_PATH "${CMAKE_BINARY_DIR}/CLDR" CACHE PATH "Download location for CLDR files") set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt") diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp index 644bbd0818..d8d6dc0527 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp @@ -39,8 +39,8 @@ constexpr auto s_calendar_pattern_list_index_type = "u8"sv; using CalendarRangePatternIndexType = u16; constexpr auto s_calendar_range_pattern_index_type = "u16"sv; -using CalendarRangePatternListIndexType = u8; -constexpr auto s_calendar_range_pattern_list_index_type = "u8"sv; +using CalendarRangePatternListIndexType = u16; +constexpr auto s_calendar_range_pattern_list_index_type = "u16"sv; using CalendarFormatIndexType = u8; constexpr auto s_calendar_format_index_type = "u8"sv; @@ -764,32 +764,48 @@ static constexpr auto is_char(char ch) // "{hour}:{minute} {ampm} {timeZoneName}" becomes "{hour}:{minute} {timeZoneName}" (remove one of the spaces around {ampm}) static String remove_period_from_pattern(String pattern) { + auto is_surrounding_space = [&](auto code_point_iterator) { + if (code_point_iterator.done()) + return false; + + constexpr auto spaces = Array { static_cast<u32>(0x0020), 0x00a0, 0x2009, 0x202f }; + return spaces.span().contains_slow(*code_point_iterator); + }; + + auto is_opening = [&](auto code_point_iterator) { + if (code_point_iterator.done()) + return false; + return *code_point_iterator == '{'; + }; + + auto is_closing = [&](auto code_point_iterator) { + if (code_point_iterator.done()) + return false; + return *code_point_iterator == '}'; + }; + for (auto remove : AK::Array { "({ampm})"sv, "{ampm}"sv, "({dayPeriod})"sv, "{dayPeriod}"sv }) { auto index = pattern.find(remove); if (!index.has_value()) continue; - constexpr u32 space = ' '; - constexpr u32 open = '{'; - constexpr u32 close = '}'; - Utf8View utf8_pattern { pattern }; - Optional<u32> before_removal; - Optional<u32> after_removal; + Utf8CodePointIterator before_removal; + Utf8CodePointIterator after_removal; for (auto it = utf8_pattern.begin(); utf8_pattern.byte_offset_of(it) < *index; ++it) - before_removal = *it; + before_removal = it; if (auto it = utf8_pattern.iterator_at_byte_offset(*index + remove.length()); it != utf8_pattern.end()) - after_removal = *it; + after_removal = it; - if ((before_removal == space) && (after_removal != open)) { + if (is_surrounding_space(before_removal) && !is_opening(after_removal)) { pattern = String::formatted("{}{}", - pattern.substring_view(0, *index - 1), + pattern.substring_view(0, *index - before_removal.underlying_code_point_length_in_bytes()), pattern.substring_view(*index + remove.length())); - } else if ((after_removal == space) && (before_removal != close)) { + } else if (is_surrounding_space(after_removal) && !is_closing(before_removal)) { pattern = String::formatted("{}{}", pattern.substring_view(0, *index), - pattern.substring_view(*index + remove.length() + 1)); + pattern.substring_view(*index + remove.length() + after_removal.underlying_code_point_length_in_bytes())); } else { pattern = String::formatted("{}{}", pattern.substring_view(0, *index), @@ -1466,9 +1482,10 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, CLDR& cldr, L auto const& time_skeletons_object = value.as_object().get("timeSkeletons"sv); calendar.time_formats = parse_patterns(time_formats_object.as_object(), time_skeletons_object.as_object(), &time_formats); - auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv); - calendar.date_time_formats = parse_patterns(date_time_formats_object.as_object(), JsonObject {}, nullptr); + auto const& standard_date_time_formats_object = value.as_object().get("dateTimeFormats-atTime"sv).as_object().get("standard"sv); + calendar.date_time_formats = parse_patterns(standard_date_time_formats_object.as_object(), JsonObject {}, nullptr); + auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv); auto const& available_formats_object = date_time_formats_object.as_object().get("availableFormats"sv); available_formats_object.as_object().for_each_member([&](auto const& skeleton, JsonValue const& pattern) { auto pattern_index = parse_date_time_pattern(pattern.as_string(), skeleton, cldr); diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp index 32f34ac24b..b8d69c4a99 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp @@ -43,8 +43,8 @@ constexpr auto s_currency_list_index_type = "u16"sv; using CalendarListIndexType = u8; constexpr auto s_calendar_list_index_type = "u8"sv; -using DateFieldListIndexType = u8; -constexpr auto s_date_field_list_index_type = "u8"sv; +using DateFieldListIndexType = u16; +constexpr auto s_date_field_list_index_type = "u16"sv; using KeywordListIndexType = u8; constexpr auto s_keyword_list_index_type = "u8"sv; |