summaryrefslogtreecommitdiff
path: root/Meta
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2022-10-19 15:34:17 -0400
committerLinus Groh <mail@linusgroh.de>2022-10-25 10:10:39 +0100
commitb077fccd3d7a9ed9a22d15c78cc8067cebd4aa26 (patch)
treee8e481b4ab39612e6110244acdda6f478c00b114 /Meta
parentb87398341babf971fae3996615283039326538e3 (diff)
downloadserenity-b077fccd3d7a9ed9a22d15c78cc8067cebd4aa26.zip
LibLocale+LibJS: Update to CLDR version 42.0.0
There were some notable changes to the CLDR JSON format and data in this release. The patterns for a date at a specific time, i.e. "{date} at {time}", now appear under the "atTime" attribute of the "dateTimeFormats" object. Locale specific changes that affected test-js: All locales: * In many patterns, the code points U+00A0 (NO-BREAK SPACE) and U+202F (NARROW NO-BREAK SPACE) are now used in place of an ASCII space. For example, before the "dayPeriod" fields AM and PM. * Separators such as U+2013 (EN DASH) are now surrounded by U+2009 (THIN SPACE) in place of an ASCII space character. Locale "en": * Narrow localizations of time formats are even more narrow. For example, the abbreviation "wk." for "week" is now just "wk". Locale "ar": * The code point U+060C (ARABIC COMMA) is now used in place of an ASCII comma. * The code point U+200F (RIGHT-TO-LEFT MARK) now appears at the beginning of many localizations. * When the "latn" numbering system is used for currency formatting, the currency symbol more consistently is placed at the end of the pattern. Locale "he": * The "many" plural rules category has been removed. Locales "zh" and "es-419": * Several display-name localizations were changed.
Diffstat (limited to 'Meta')
-rw-r--r--Meta/CMake/locale_data.cmake2
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp49
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp4
3 files changed, 36 insertions, 19 deletions
diff --git a/Meta/CMake/locale_data.cmake b/Meta/CMake/locale_data.cmake
index a548ff67e6..292247db55 100644
--- a/Meta/CMake/locale_data.cmake
+++ b/Meta/CMake/locale_data.cmake
@@ -1,6 +1,6 @@
include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake)
-set(CLDR_VERSION 41.0.0)
+set(CLDR_VERSION 42.0.0)
set(CLDR_PATH "${CMAKE_BINARY_DIR}/CLDR" CACHE PATH "Download location for CLDR files")
set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt")
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp
index 644bbd0818..d8d6dc0527 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateDateTimeFormatData.cpp
@@ -39,8 +39,8 @@ constexpr auto s_calendar_pattern_list_index_type = "u8"sv;
using CalendarRangePatternIndexType = u16;
constexpr auto s_calendar_range_pattern_index_type = "u16"sv;
-using CalendarRangePatternListIndexType = u8;
-constexpr auto s_calendar_range_pattern_list_index_type = "u8"sv;
+using CalendarRangePatternListIndexType = u16;
+constexpr auto s_calendar_range_pattern_list_index_type = "u16"sv;
using CalendarFormatIndexType = u8;
constexpr auto s_calendar_format_index_type = "u8"sv;
@@ -764,32 +764,48 @@ static constexpr auto is_char(char ch)
// "{hour}:{minute} {ampm} {timeZoneName}" becomes "{hour}:{minute} {timeZoneName}" (remove one of the spaces around {ampm})
static String remove_period_from_pattern(String pattern)
{
+ auto is_surrounding_space = [&](auto code_point_iterator) {
+ if (code_point_iterator.done())
+ return false;
+
+ constexpr auto spaces = Array { static_cast<u32>(0x0020), 0x00a0, 0x2009, 0x202f };
+ return spaces.span().contains_slow(*code_point_iterator);
+ };
+
+ auto is_opening = [&](auto code_point_iterator) {
+ if (code_point_iterator.done())
+ return false;
+ return *code_point_iterator == '{';
+ };
+
+ auto is_closing = [&](auto code_point_iterator) {
+ if (code_point_iterator.done())
+ return false;
+ return *code_point_iterator == '}';
+ };
+
for (auto remove : AK::Array { "({ampm})"sv, "{ampm}"sv, "({dayPeriod})"sv, "{dayPeriod}"sv }) {
auto index = pattern.find(remove);
if (!index.has_value())
continue;
- constexpr u32 space = ' ';
- constexpr u32 open = '{';
- constexpr u32 close = '}';
-
Utf8View utf8_pattern { pattern };
- Optional<u32> before_removal;
- Optional<u32> after_removal;
+ Utf8CodePointIterator before_removal;
+ Utf8CodePointIterator after_removal;
for (auto it = utf8_pattern.begin(); utf8_pattern.byte_offset_of(it) < *index; ++it)
- before_removal = *it;
+ before_removal = it;
if (auto it = utf8_pattern.iterator_at_byte_offset(*index + remove.length()); it != utf8_pattern.end())
- after_removal = *it;
+ after_removal = it;
- if ((before_removal == space) && (after_removal != open)) {
+ if (is_surrounding_space(before_removal) && !is_opening(after_removal)) {
pattern = String::formatted("{}{}",
- pattern.substring_view(0, *index - 1),
+ pattern.substring_view(0, *index - before_removal.underlying_code_point_length_in_bytes()),
pattern.substring_view(*index + remove.length()));
- } else if ((after_removal == space) && (before_removal != close)) {
+ } else if (is_surrounding_space(after_removal) && !is_closing(before_removal)) {
pattern = String::formatted("{}{}",
pattern.substring_view(0, *index),
- pattern.substring_view(*index + remove.length() + 1));
+ pattern.substring_view(*index + remove.length() + after_removal.underlying_code_point_length_in_bytes()));
} else {
pattern = String::formatted("{}{}",
pattern.substring_view(0, *index),
@@ -1466,9 +1482,10 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, CLDR& cldr, L
auto const& time_skeletons_object = value.as_object().get("timeSkeletons"sv);
calendar.time_formats = parse_patterns(time_formats_object.as_object(), time_skeletons_object.as_object(), &time_formats);
- auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv);
- calendar.date_time_formats = parse_patterns(date_time_formats_object.as_object(), JsonObject {}, nullptr);
+ auto const& standard_date_time_formats_object = value.as_object().get("dateTimeFormats-atTime"sv).as_object().get("standard"sv);
+ calendar.date_time_formats = parse_patterns(standard_date_time_formats_object.as_object(), JsonObject {}, nullptr);
+ auto const& date_time_formats_object = value.as_object().get("dateTimeFormats"sv);
auto const& available_formats_object = date_time_formats_object.as_object().get("availableFormats"sv);
available_formats_object.as_object().for_each_member([&](auto const& skeleton, JsonValue const& pattern) {
auto pattern_index = parse_date_time_pattern(pattern.as_string(), skeleton, cldr);
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp
index 32f34ac24b..b8d69c4a99 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp
@@ -43,8 +43,8 @@ constexpr auto s_currency_list_index_type = "u16"sv;
using CalendarListIndexType = u8;
constexpr auto s_calendar_list_index_type = "u8"sv;
-using DateFieldListIndexType = u8;
-constexpr auto s_date_field_list_index_type = "u8"sv;
+using DateFieldListIndexType = u16;
+constexpr auto s_date_field_list_index_type = "u16"sv;
using KeywordListIndexType = u8;
constexpr auto s_keyword_list_index_type = "u8"sv;