summaryrefslogtreecommitdiff
path: root/Meta/Lagom/Tools/CodeGenerators/LibUnicode
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-09-06 13:56:44 -0400
committerLinus Groh <mail@linusgroh.de>2021-09-06 23:49:56 +0100
commit3f64a14e0628051935c83a59da5e049a82610138 (patch)
treebb1bc7eb2c4847c0b75fa7bbf9ae2a7bdaae4519 /Meta/Lagom/Tools/CodeGenerators/LibUnicode
parent9cd986d8c00da6737a531c240266872dc527560c (diff)
downloadserenity-3f64a14e0628051935c83a59da5e049a82610138.zip
LibUnicode: Parse and generate the Unicode locale list patterns dataset
This data informs consumers how to join lists of values. For example, in en-US, the list ["a", "b", "c"] formatted to a string should become "a, b, and c".
Diffstat (limited to 'Meta/Lagom/Tools/CodeGenerators/LibUnicode')
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp162
1 files changed, 152 insertions, 10 deletions
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp
index e6b06a8745..8ce12799de 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeLocale.cpp
@@ -21,6 +21,15 @@
#include <LibCore/File.h>
#include <LibUnicode/Locale.h>
+struct ListPatterns {
+ String type;
+ String style;
+ String start;
+ String middle;
+ String end;
+ String pair;
+};
+
struct Locale {
String language;
Optional<String> territory;
@@ -29,6 +38,7 @@ struct Locale {
HashMap<String, String> territories;
HashMap<String, String> scripts;
HashMap<String, String> currencies;
+ Vector<ListPatterns> list_patterns;
};
struct CanonicalLanguageID {
@@ -50,6 +60,8 @@ struct UnicodeLocaleData {
Vector<String> scripts;
Vector<String> variants;
Vector<String> currencies;
+ Vector<String> list_pattern_types;
+ Vector<String> list_pattern_styles;
HashMap<String, String> language_aliases;
HashMap<String, String> territory_aliases;
HashMap<String, String> script_aliases;
@@ -293,6 +305,58 @@ static void parse_locale_scripts(String locale_path, UnicodeLocaleData& locale_d
});
}
+static void parse_locale_list_patters(String misc_path, UnicodeLocaleData& locale_data, Locale& locale)
+{
+ LexicalPath list_patterns_path(move(misc_path));
+ list_patterns_path = list_patterns_path.append("listPatterns.json"sv);
+ VERIFY(Core::File::exists(list_patterns_path.string()));
+
+ auto list_patterns_file_or_error = Core::File::open(list_patterns_path.string(), Core::OpenMode::ReadOnly);
+ VERIFY(!list_patterns_file_or_error.is_error());
+
+ auto list_patterns = JsonParser(list_patterns_file_or_error.value()->read_all()).parse();
+ VERIFY(list_patterns.has_value());
+
+ auto const& main_object = list_patterns->as_object().get("main"sv);
+ auto const& locale_object = main_object.as_object().get(list_patterns_path.parent().basename());
+ auto const& list_patterns_object = locale_object.as_object().get("listPatterns"sv);
+
+ auto list_pattern_type = [](StringView key) {
+ if (key.contains("type-standard"sv))
+ return "conjunction"sv;
+ if (key.contains("type-or"sv))
+ return "disjunction"sv;
+ if (key.contains("type-unit"sv))
+ return "unit"sv;
+ VERIFY_NOT_REACHED();
+ };
+
+ auto list_pattern_style = [](StringView key) {
+ if (key.contains("short"sv))
+ return "short"sv;
+ if (key.contains("narrow"sv))
+ return "narrow"sv;
+ return "long"sv;
+ };
+
+ list_patterns_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
+ auto type = list_pattern_type(key);
+ auto style = list_pattern_style(key);
+
+ auto start = value.as_object().get("start"sv).as_string();
+ auto middle = value.as_object().get("middle"sv).as_string();
+ auto end = value.as_object().get("end"sv).as_string();
+ auto pair = value.as_object().get("2"sv).as_string();
+
+ if (!locale_data.list_pattern_types.contains_slow(type))
+ locale_data.list_pattern_types.append(type);
+ if (!locale_data.list_pattern_styles.contains_slow(style))
+ locale_data.list_pattern_styles.append(style);
+
+ locale.list_patterns.append({ move(type), move(style), move(start), move(middle), move(end), move(pair) });
+ });
+}
+
static void parse_locale_currencies(String numbers_path, UnicodeLocaleData& locale_data, Locale& locale)
{
LexicalPath currencies_path(move(numbers_path));
@@ -333,9 +397,10 @@ static Core::DirIterator path_to_dir_iterator(String path)
return iterator;
}
-static void parse_all_locales(String core_path, String locale_names_path, String numbers_path, UnicodeLocaleData& locale_data)
+static void parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data)
{
auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path));
+ auto misc_iterator = path_to_dir_iterator(move(misc_path));
auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
LexicalPath core_supplemental_path(move(core_path));
@@ -356,6 +421,14 @@ static void parse_all_locales(String core_path, String locale_names_path, String
parse_locale_scripts(locale_path, locale_data, locale);
}
+ while (misc_iterator.has_next()) {
+ auto misc_path = misc_iterator.next_full_path();
+ VERIFY(Core::File::is_directory(misc_path));
+
+ auto& locale = locale_data.locales.ensure(LexicalPath::basename(misc_path));
+ parse_locale_list_patters(misc_path, locale_data, locale);
+ }
+
while (numbers_iterator.has_next()) {
auto numbers_path = numbers_iterator.next_full_path();
VERIFY(Core::File::is_directory(numbers_path));
@@ -423,6 +496,8 @@ namespace Unicode {
generate_enum("ScriptTag"sv, {}, locale_data.scripts);
generate_enum("Currency"sv, {}, locale_data.currencies);
generate_enum("Variant"sv, {}, locale_data.variants);
+ generate_enum("ListPatternType"sv, {}, locale_data.list_pattern_types);
+ generate_enum("ListPatternStyle"sv, {}, locale_data.list_pattern_styles);
generator.append(R"~~~(
namespace Detail {
@@ -444,6 +519,10 @@ Optional<StringView> resolve_script_tag_alias(StringView const& script_tag);
Optional<StringView> get_locale_currency_mapping(StringView locale, StringView currency);
Optional<Currency> currency_from_string(StringView const& currency);
+Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style);
+Optional<ListPatternType> list_pattern_type_from_string(StringView const& list_pattern_type);
+Optional<ListPatternStyle> list_pattern_style_from_string(StringView const& list_pattern_style);
+
Optional<StringView> resolve_variant_alias(StringView const& variant);
Optional<StringView> resolve_subdivision_alias(StringView const& subdivision);
@@ -476,6 +555,15 @@ static void generate_unicode_locale_implementation(Core::File& file, UnicodeLoca
#include <LibUnicode/UnicodeLocale.h>
namespace Unicode {
+
+struct Patterns {
+ ListPatternType type;
+ ListPatternStyle style;
+ StringView start;
+ StringView middle;
+ StringView end;
+ StringView pair;
+};
)~~~");
auto format_mapping_name = [](StringView format, StringView name) {
@@ -507,7 +595,7 @@ namespace Unicode {
generator.append(String::formatted(" }}, {}", list.size()));
};
- auto append_mapping_list = [&](String name, auto const& keys, auto const& mappings) {
+ auto append_string_list = [&](String name, auto const& keys, auto const& mappings) {
generator.set("name", name);
generator.set("size", String::number(keys.size()));
@@ -539,21 +627,46 @@ static constexpr Array<StringView, @size@> @name@ { {
)~~~");
};
- auto append_mapping = [&](StringView name, StringView format, auto const& keys, auto get_mapping_callback) {
+ auto append_list_patterns = [&](StringView name, Vector<ListPatterns> const& list_patterns) {
+ generator.set("name", name);
+ generator.set("size", String::number(list_patterns.size()));
+
+ generator.append(R"~~~(
+static constexpr Array<Patterns, @size@> @name@ { {)~~~");
+
+ for (auto const& list_pattern : list_patterns) {
+ generator.set("type"sv, String::formatted("ListPatternType::{}", format_identifier({}, list_pattern.type)));
+ generator.set("style"sv, String::formatted("ListPatternStyle::{}", format_identifier({}, list_pattern.style)));
+ generator.set("start"sv, String::formatted("\"{}\"sv", list_pattern.start));
+ generator.set("middle"sv, String::formatted("\"{}\"sv", list_pattern.middle));
+ generator.set("end"sv, String::formatted("\"{}\"sv", list_pattern.end));
+ generator.set("pair"sv, String::formatted("\"{}\"sv", list_pattern.pair));
+
+ generator.append(R"~~~(
+ { @type@, @style@, @start@, @middle@, @end@, @pair@ },)~~~");
+ }
+
+ generator.append(R"~~~(
+} };
+)~~~");
+ };
+
+ auto append_mapping = [&](StringView type, StringView name, StringView format, auto format_list_callback) {
Vector<String> mapping_names;
for (auto const& locale : locale_data.locales) {
auto mapping_name = format_mapping_name(format, locale.key);
- append_mapping_list(mapping_name, keys, get_mapping_callback(locale.value));
+ format_list_callback(mapping_name, locale.value);
mapping_names.append(move(mapping_name));
}
quick_sort(mapping_names);
+ generator.set("type", type);
generator.set("name", name);
generator.set("size", String::number(locale_data.locales.size()));
generator.append(R"~~~(
-static constexpr Array<Span<StringView const>, @size@> @name@ { {
+static constexpr Array<Span<@type@ const>, @size@> @name@ { {
)~~~");
constexpr size_t max_values_per_row = 10;
@@ -577,10 +690,11 @@ static constexpr Array<Span<StringView const>, @size@> @name@ { {
)~~~");
};
- append_mapping("s_languages"sv, "s_languages_{}", locale_data.languages, [](auto const& value) { return value.languages; });
- append_mapping("s_territories"sv, "s_territories_{}", locale_data.territories, [](auto const& value) { return value.territories; });
- append_mapping("s_scripts"sv, "s_scripts_{}", locale_data.scripts, [](auto const& value) { return value.scripts; });
- append_mapping("s_currencies"sv, "s_currencies_{}", locale_data.currencies, [](auto const& value) { return value.currencies; });
+ append_mapping("StringView"sv, "s_languages"sv, "s_languages_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.languages, value.languages); });
+ append_mapping("StringView"sv, "s_territories"sv, "s_territories_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.territories, value.territories); });
+ append_mapping("StringView"sv, "s_scripts"sv, "s_scripts_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.scripts, value.scripts); });
+ append_mapping("StringView"sv, "s_currencies"sv, "s_currencies_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.currencies, value.currencies); });
+ append_mapping("Patterns"sv, "s_list_patterns"sv, "s_list_patterns_{}", [&](auto const& name, auto const& value) { append_list_patterns(name, value.list_patterns); });
generator.append(R"~~~(
struct CanonicalLanguageID {
@@ -866,7 +980,35 @@ Optional<StringView> resolve_@enum_snake@_alias(StringView const& @enum_snake@)
append_alias_search("variant"sv, locale_data.variant_aliases);
append_alias_search("subdivision"sv, locale_data.subdivision_aliases);
+ append_from_string("ListPatternType"sv, "list_pattern_type"sv, locale_data.list_pattern_types);
+ append_from_string("ListPatternStyle"sv, "list_pattern_style"sv, locale_data.list_pattern_styles);
+
generator.append(R"~~~(
+Optional<ListPatterns> get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style)
+{
+ auto locale_value = locale_from_string(locale);
+ if (!locale_value.has_value())
+ return {};
+
+ auto type_value = list_pattern_type_from_string(list_pattern_type);
+ if (!type_value.has_value())
+ return {};
+
+ auto style_value = list_pattern_style_from_string(list_pattern_style);
+ if (!style_value.has_value())
+ return {};
+
+ auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
+ auto const& locale_list_patterns = s_list_patterns.at(locale_index);
+
+ for (auto const& list_patterns : locale_list_patterns) {
+ if ((list_patterns.type == type_value) && (list_patterns.style == style_value))
+ return ListPatterns { list_patterns.start, list_patterns.middle, list_patterns.end, list_patterns.pair };
+ }
+
+ return {};
+}
+
void resolve_complex_language_aliases(Unicode::LanguageID& language_id)
{
for (auto const& map : s_complex_alias) {
@@ -969,7 +1111,7 @@ int main(int argc, char** argv)
auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite);
UnicodeLocaleData locale_data;
- parse_all_locales(core_path, locale_names_path, numbers_path, locale_data);
+ parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, locale_data);
generate_unicode_locale_header(generated_header_file, locale_data);
generate_unicode_locale_implementation(generated_implementation_file, locale_data);