/* * Copyright (c) 2021, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct ListPatterns { String type; String style; String start; String middle; String end; String pair; }; struct Locale { String language; Optional territory; Optional variant; HashMap languages; HashMap territories; HashMap scripts; HashMap currencies; Vector list_patterns; }; struct CanonicalLanguageID { String language {}; String script {}; String region {}; Vector variants {}; }; struct LanguageMapping { CanonicalLanguageID key {}; CanonicalLanguageID alias {}; }; struct UnicodeLocaleData { HashMap locales; Vector languages; Vector territories; Vector scripts; Vector variants; Vector currencies; Vector list_pattern_types; Vector list_pattern_styles; HashMap language_aliases; HashMap territory_aliases; HashMap script_aliases; HashMap variant_aliases; HashMap subdivision_aliases; Vector complex_mappings; Vector likely_subtags; size_t max_variant_size { 0 }; }; static void write_to_file_if_different(Core::File& file, StringView contents) { auto const current_contents = file.read_all(); if (StringView { current_contents.bytes() } == contents) return; VERIFY(file.seek(0)); VERIFY(file.truncate(0)); VERIFY(file.write(contents)); } static Optional parse_language(StringView language) { CanonicalLanguageID language_id {}; auto segments = language.split_view('-'); VERIFY(!segments.is_empty()); size_t index = 0; if (Unicode::is_unicode_language_subtag(segments[index])) { language_id.language = segments[index]; if (segments.size() == ++index) return language_id; } else { return {}; } if (Unicode::is_unicode_script_subtag(segments[index])) { language_id.script = segments[index]; if (segments.size() == ++index) return language_id; } if (Unicode::is_unicode_region_subtag(segments[index])) { language_id.region = segments[index]; if (segments.size() == ++index) return language_id; } while (index < segments.size()) { if (!Unicode::is_unicode_variant_subtag(segments[index])) return {}; language_id.variants.append(segments[index++]); } return language_id; } static Optional parse_language_mapping(StringView key, StringView alias) { auto parsed_key = parse_language(key); if (!parsed_key.has_value()) return {}; auto parsed_alias = parse_language(alias); if (!parsed_alias.has_value()) return {}; return LanguageMapping { parsed_key.release_value(), parsed_alias.release_value() }; } static void parse_core_aliases(String core_supplemental_path, UnicodeLocaleData& locale_data) { LexicalPath core_aliases_path(move(core_supplemental_path)); core_aliases_path = core_aliases_path.append("aliases.json"sv); VERIFY(Core::File::exists(core_aliases_path.string())); auto core_aliases_file_or_error = Core::File::open(core_aliases_path.string(), Core::OpenMode::ReadOnly); VERIFY(!core_aliases_file_or_error.is_error()); auto core_aliases = JsonParser(core_aliases_file_or_error.value()->read_all()).parse(); VERIFY(core_aliases.has_value()); auto const& supplemental_object = core_aliases->as_object().get("supplemental"sv); auto const& metadata_object = supplemental_object.as_object().get("metadata"sv); auto const& alias_object = metadata_object.as_object().get("alias"sv); auto append_aliases = [&](auto& alias_object, auto& alias_map) { alias_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { auto alias = value.as_object().get("_replacement"sv).as_string(); if (key.contains('-')) { auto mapping = parse_language_mapping(key, alias); if (!mapping.has_value()) return; locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size); locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size); locale_data.complex_mappings.append(mapping.release_value()); } else { alias_map.set(key, move(alias)); } }); }; append_aliases(alias_object.as_object().get("languageAlias"sv), locale_data.language_aliases); append_aliases(alias_object.as_object().get("territoryAlias"sv), locale_data.territory_aliases); append_aliases(alias_object.as_object().get("scriptAlias"sv), locale_data.script_aliases); append_aliases(alias_object.as_object().get("variantAlias"sv), locale_data.variant_aliases); append_aliases(alias_object.as_object().get("subdivisionAlias"sv), locale_data.subdivision_aliases); } static void parse_likely_subtags(String core_supplemental_path, UnicodeLocaleData& locale_data) { LexicalPath likely_subtags_path(move(core_supplemental_path)); likely_subtags_path = likely_subtags_path.append("likelySubtags.json"sv); VERIFY(Core::File::exists(likely_subtags_path.string())); auto likely_subtags_file_or_error = Core::File::open(likely_subtags_path.string(), Core::OpenMode::ReadOnly); VERIFY(!likely_subtags_file_or_error.is_error()); auto likely_subtags = JsonParser(likely_subtags_file_or_error.value()->read_all()).parse(); VERIFY(likely_subtags.has_value()); auto const& supplemental_object = likely_subtags->as_object().get("supplemental"sv); auto const& likely_subtags_object = supplemental_object.as_object().get("likelySubtags"sv); likely_subtags_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { auto mapping = parse_language_mapping(key, value.as_string()); if (!mapping.has_value()) return; locale_data.max_variant_size = max(mapping->key.variants.size(), locale_data.max_variant_size); locale_data.max_variant_size = max(mapping->alias.variants.size(), locale_data.max_variant_size); locale_data.likely_subtags.append(mapping.release_value()); }); } static void parse_identity(String locale_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath languages_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them. languages_path = languages_path.append("languages.json"sv); VERIFY(Core::File::exists(languages_path.string())); auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly); VERIFY(!languages_file_or_error.is_error()); auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse(); VERIFY(languages.has_value()); auto const& main_object = languages->as_object().get("main"sv); auto const& locale_object = main_object.as_object().get(languages_path.parent().basename()); auto const& identity_object = locale_object.as_object().get("identity"sv); auto const& language_string = identity_object.as_object().get("language"sv); auto const& territory_string = identity_object.as_object().get("territory"sv); auto const& variant_string = identity_object.as_object().get("variant"sv); locale.language = language_string.as_string(); if (!locale_data.languages.contains_slow(locale.language)) locale_data.languages.append(locale.language); if (territory_string.is_string()) { locale.territory = territory_string.as_string(); if (!locale_data.territories.contains_slow(*locale.territory)) locale_data.territories.append(*locale.territory); } if (variant_string.is_string()) { locale.variant = variant_string.as_string(); if (!locale_data.variants.contains_slow(*locale.variant)) locale_data.variants.append(*locale.variant); } } static void parse_locale_languages(String locale_path, Locale& locale) { LexicalPath languages_path(move(locale_path)); languages_path = languages_path.append("languages.json"sv); VERIFY(Core::File::exists(languages_path.string())); auto languages_file_or_error = Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly); VERIFY(!languages_file_or_error.is_error()); auto languages = JsonParser(languages_file_or_error.value()->read_all()).parse(); VERIFY(languages.has_value()); auto const& main_object = languages->as_object().get("main"sv); auto const& locale_object = main_object.as_object().get(languages_path.parent().basename()); auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv); auto const& languages_object = locale_display_names_object.as_object().get("languages"sv); languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { locale.languages.set(key, value.as_string()); }); } static void parse_locale_territories(String locale_path, Locale& locale) { LexicalPath territories_path(move(locale_path)); territories_path = territories_path.append("territories.json"sv); VERIFY(Core::File::exists(territories_path.string())); auto territories_file_or_error = Core::File::open(territories_path.string(), Core::OpenMode::ReadOnly); VERIFY(!territories_file_or_error.is_error()); auto territories = JsonParser(territories_file_or_error.value()->read_all()).parse(); VERIFY(territories.has_value()); auto const& main_object = territories->as_object().get("main"sv); auto const& locale_object = main_object.as_object().get(territories_path.parent().basename()); auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv); auto const& territories_object = locale_display_names_object.as_object().get("territories"sv); territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { locale.territories.set(key, value.as_string()); }); } static void parse_locale_scripts(String locale_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath scripts_path(move(locale_path)); scripts_path = scripts_path.append("scripts.json"sv); VERIFY(Core::File::exists(scripts_path.string())); auto scripts_file_or_error = Core::File::open(scripts_path.string(), Core::OpenMode::ReadOnly); VERIFY(!scripts_file_or_error.is_error()); auto scripts = JsonParser(scripts_file_or_error.value()->read_all()).parse(); VERIFY(scripts.has_value()); auto const& main_object = scripts->as_object().get("main"sv); auto const& locale_object = main_object.as_object().get(scripts_path.parent().basename()); auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv); auto const& scripts_object = locale_display_names_object.as_object().get("scripts"sv); scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { locale.scripts.set(key, value.as_string()); if (!locale_data.scripts.contains_slow(key)) locale_data.scripts.append(key); }); } static void parse_locale_list_patters(String misc_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath list_patterns_path(move(misc_path)); list_patterns_path = list_patterns_path.append("listPatterns.json"sv); VERIFY(Core::File::exists(list_patterns_path.string())); auto list_patterns_file_or_error = Core::File::open(list_patterns_path.string(), Core::OpenMode::ReadOnly); VERIFY(!list_patterns_file_or_error.is_error()); auto list_patterns = JsonParser(list_patterns_file_or_error.value()->read_all()).parse(); VERIFY(list_patterns.has_value()); auto const& main_object = list_patterns->as_object().get("main"sv); auto const& locale_object = main_object.as_object().get(list_patterns_path.parent().basename()); auto const& list_patterns_object = locale_object.as_object().get("listPatterns"sv); auto list_pattern_type = [](StringView key) { if (key.contains("type-standard"sv)) return "conjunction"sv; if (key.contains("type-or"sv)) return "disjunction"sv; if (key.contains("type-unit"sv)) return "unit"sv; VERIFY_NOT_REACHED(); }; auto list_pattern_style = [](StringView key) { if (key.contains("short"sv)) return "short"sv; if (key.contains("narrow"sv)) return "narrow"sv; return "long"sv; }; list_patterns_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { auto type = list_pattern_type(key); auto style = list_pattern_style(key); auto start = value.as_object().get("start"sv).as_string(); auto middle = value.as_object().get("middle"sv).as_string(); auto end = value.as_object().get("end"sv).as_string(); auto pair = value.as_object().get("2"sv).as_string(); if (!locale_data.list_pattern_types.contains_slow(type)) locale_data.list_pattern_types.append(type); if (!locale_data.list_pattern_styles.contains_slow(style)) locale_data.list_pattern_styles.append(style); locale.list_patterns.append({ move(type), move(style), move(start), move(middle), move(end), move(pair) }); }); } static void parse_locale_currencies(String numbers_path, UnicodeLocaleData& locale_data, Locale& locale) { LexicalPath currencies_path(move(numbers_path)); currencies_path = currencies_path.append("currencies.json"sv); VERIFY(Core::File::exists(currencies_path.string())); auto currencies_file_or_error = Core::File::open(currencies_path.string(), Core::OpenMode::ReadOnly); VERIFY(!currencies_file_or_error.is_error()); auto currencies = JsonParser(currencies_file_or_error.value()->read_all()).parse(); VERIFY(currencies.has_value()); auto const& main_object = currencies->as_object().get("main"sv); auto const& locale_object = main_object.as_object().get(currencies_path.parent().basename()); auto const& locale_numbers_object = locale_object.as_object().get("numbers"sv); auto const& currencies_object = locale_numbers_object.as_object().get("currencies"sv); currencies_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) { auto const& display_name = value.as_object().get("displayName"sv); locale.currencies.set(key, display_name.as_string()); if (!locale_data.currencies.contains_slow(key)) locale_data.currencies.append(key); }); } static Core::DirIterator path_to_dir_iterator(String path) { LexicalPath lexical_path(move(path)); lexical_path = lexical_path.append("main"sv); VERIFY(Core::File::is_directory(lexical_path.string())); Core::DirIterator iterator(lexical_path.string(), Core::DirIterator::SkipParentAndBaseDir); if (iterator.has_error()) { warnln("{}: {}", lexical_path.string(), iterator.error_string()); VERIFY_NOT_REACHED(); } return iterator; } static void parse_all_locales(String core_path, String locale_names_path, String misc_path, String numbers_path, UnicodeLocaleData& locale_data) { auto locale_names_iterator = path_to_dir_iterator(move(locale_names_path)); auto misc_iterator = path_to_dir_iterator(move(misc_path)); auto numbers_iterator = path_to_dir_iterator(move(numbers_path)); LexicalPath core_supplemental_path(move(core_path)); core_supplemental_path = core_supplemental_path.append("supplemental"sv); VERIFY(Core::File::is_directory(core_supplemental_path.string())); parse_core_aliases(core_supplemental_path.string(), locale_data); parse_likely_subtags(core_supplemental_path.string(), locale_data); while (locale_names_iterator.has_next()) { auto locale_path = locale_names_iterator.next_full_path(); VERIFY(Core::File::is_directory(locale_path)); auto& locale = locale_data.locales.ensure(LexicalPath::basename(locale_path)); parse_identity(locale_path, locale_data, locale); parse_locale_languages(locale_path, locale); parse_locale_territories(locale_path, locale); parse_locale_scripts(locale_path, locale_data, locale); } while (misc_iterator.has_next()) { auto misc_path = misc_iterator.next_full_path(); VERIFY(Core::File::is_directory(misc_path)); auto& locale = locale_data.locales.ensure(LexicalPath::basename(misc_path)); parse_locale_list_patters(misc_path, locale_data, locale); } while (numbers_iterator.has_next()) { auto numbers_path = numbers_iterator.next_full_path(); VERIFY(Core::File::is_directory(numbers_path)); auto& locale = locale_data.locales.ensure(LexicalPath::basename(numbers_path)); parse_locale_currencies(numbers_path, locale_data, locale); } } static String format_identifier(StringView owner, String identifier) { identifier.replace("-"sv, "_"sv, true); if (all_of(identifier, is_ascii_digit)) return String::formatted("{}_{}", owner[0], identifier); return identifier.to_titlecase(); } static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData& locale_data) { StringBuilder builder; SourceGenerator generator { builder }; auto generate_enum = [&](StringView name, StringView default_, Vector& values) { quick_sort(values); generator.set("name", name); generator.set("underlying", ((values.size() + !default_.is_empty()) < 256) ? "u8"sv : "u16"sv); generator.append(R"~~~( enum class @name@ : @underlying@ {)~~~"); if (!default_.is_empty()) { generator.set("default", default_); generator.append(R"~~~( @default@,)~~~"); } for (auto const& value : values) { generator.set("value", format_identifier(name, value)); generator.append(R"~~~( @value@,)~~~"); } generator.append(R"~~~( }; )~~~"); }; generator.append(R"~~~( #pragma once #include #include #include #include namespace Unicode { )~~~"); auto locales = locale_data.locales.keys(); generate_enum("Locale"sv, "None"sv, locales); generate_enum("Language"sv, {}, locale_data.languages); generate_enum("Territory"sv, {}, locale_data.territories); generate_enum("ScriptTag"sv, {}, locale_data.scripts); generate_enum("Currency"sv, {}, locale_data.currencies); generate_enum("Variant"sv, {}, locale_data.variants); generate_enum("ListPatternType"sv, {}, locale_data.list_pattern_types); generate_enum("ListPatternStyle"sv, {}, locale_data.list_pattern_styles); generator.append(R"~~~( namespace Detail { Optional locale_from_string(StringView const& locale); Optional get_locale_language_mapping(StringView locale, StringView language); Optional language_from_string(StringView const& language); Optional resolve_language_alias(StringView const& language); Optional get_locale_territory_mapping(StringView locale, StringView territory); Optional territory_from_string(StringView const& territory); Optional resolve_territory_alias(StringView const& territory); Optional get_locale_script_tag_mapping(StringView locale, StringView script_tag); Optional script_tag_from_string(StringView const& script_tag); Optional resolve_script_tag_alias(StringView const& script_tag); Optional get_locale_currency_mapping(StringView locale, StringView currency); Optional currency_from_string(StringView const& currency); Optional get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style); Optional list_pattern_type_from_string(StringView const& list_pattern_type); Optional list_pattern_style_from_string(StringView const& list_pattern_style); Optional resolve_variant_alias(StringView const& variant); Optional resolve_subdivision_alias(StringView const& subdivision); void resolve_complex_language_aliases(Unicode::LanguageID& language_id); Optional add_likely_subtags(Unicode::LanguageID const& language_id); Optional resolve_most_likely_territory(Unicode::LanguageID const& language_id); } } )~~~"); write_to_file_if_different(file, generator.as_string_view()); } static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data) { StringBuilder builder; SourceGenerator generator { builder }; generator.set("locales_size"sv, String::number(locale_data.locales.size())); generator.set("territories_size", String::number(locale_data.territories.size())); generator.set("variants_size", String::number(locale_data.max_variant_size)); generator.append(R"~~~( #include #include #include #include #include namespace Unicode { struct Patterns { ListPatternType type; ListPatternStyle style; StringView start; StringView middle; StringView end; StringView pair; }; )~~~"); auto format_mapping_name = [](StringView format, StringView name) { auto mapping_name = name.to_lowercase_string(); mapping_name.replace("-"sv, "_"sv, true); return String::formatted(format, mapping_name); }; auto append_string = [&](StringView value) { if (value.is_empty()) generator.append(", {}"sv); else generator.append(String::formatted(", \"{}\"sv", value)); }; auto append_list_and_size = [&](auto const& list) { if (list.is_empty()) { generator.append(", {}, 0"); return; } bool first = true; generator.append(", {"); for (auto const& item : list) { generator.append(first ? " " : ", "); generator.append(String::formatted("\"{}\"sv", item)); first = false; } generator.append(String::formatted(" }}, {}", list.size())); }; auto append_string_list = [&](String name, auto const& keys, auto const& mappings) { generator.set("name", name); generator.set("size", String::number(keys.size())); generator.append(R"~~~( static constexpr Array @name@ { { )~~~"); constexpr size_t max_values_per_row = 10; size_t values_in_current_row = 0; for (auto const& key : keys) { if (values_in_current_row++ > 0) generator.append(" "); if (auto it = mappings.find(key); it != mappings.end()) generator.set("mapping"sv, String::formatted("\"{}\"sv", it->value)); else generator.set("mapping"sv, "{}"sv); generator.append("@mapping@,"); if (values_in_current_row == max_values_per_row) { values_in_current_row = 0; generator.append("\n "); } } generator.append(R"~~~( } }; )~~~"); }; auto append_list_patterns = [&](StringView name, Vector const& list_patterns) { generator.set("name", name); generator.set("size", String::number(list_patterns.size())); generator.append(R"~~~( static constexpr Array @name@ { {)~~~"); for (auto const& list_pattern : list_patterns) { generator.set("type"sv, String::formatted("ListPatternType::{}", format_identifier({}, list_pattern.type))); generator.set("style"sv, String::formatted("ListPatternStyle::{}", format_identifier({}, list_pattern.style))); generator.set("start"sv, String::formatted("\"{}\"sv", list_pattern.start)); generator.set("middle"sv, String::formatted("\"{}\"sv", list_pattern.middle)); generator.set("end"sv, String::formatted("\"{}\"sv", list_pattern.end)); generator.set("pair"sv, String::formatted("\"{}\"sv", list_pattern.pair)); generator.append(R"~~~( { @type@, @style@, @start@, @middle@, @end@, @pair@ },)~~~"); } generator.append(R"~~~( } }; )~~~"); }; auto append_mapping = [&](StringView type, StringView name, StringView format, auto format_list_callback) { Vector mapping_names; for (auto const& locale : locale_data.locales) { auto mapping_name = format_mapping_name(format, locale.key); format_list_callback(mapping_name, locale.value); mapping_names.append(move(mapping_name)); } quick_sort(mapping_names); generator.set("type", type); generator.set("name", name); generator.set("size", String::number(locale_data.locales.size())); generator.append(R"~~~( static constexpr Array, @size@> @name@ { { )~~~"); constexpr size_t max_values_per_row = 10; size_t values_in_current_row = 0; for (auto& mapping_name : mapping_names) { if (values_in_current_row++ > 0) generator.append(" "); generator.set("name", move(mapping_name)); generator.append("@name@.span(),"); if (values_in_current_row == max_values_per_row) { values_in_current_row = 0; generator.append("\n "); } } generator.append(R"~~~( } }; )~~~"); }; append_mapping("StringView"sv, "s_languages"sv, "s_languages_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.languages, value.languages); }); append_mapping("StringView"sv, "s_territories"sv, "s_territories_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.territories, value.territories); }); append_mapping("StringView"sv, "s_scripts"sv, "s_scripts_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.scripts, value.scripts); }); append_mapping("StringView"sv, "s_currencies"sv, "s_currencies_{}", [&](auto const& name, auto const& value) { append_string_list(name, locale_data.currencies, value.currencies); }); append_mapping("Patterns"sv, "s_list_patterns"sv, "s_list_patterns_{}", [&](auto const& name, auto const& value) { append_list_patterns(name, value.list_patterns); }); generator.append(R"~~~( struct CanonicalLanguageID { Unicode::LanguageID to_unicode_language_id() const { Unicode::LanguageID language_id {}; language_id.variants.ensure_capacity(variants_size); language_id.language = language.to_string(); if (!script.is_empty()) language_id.script = script.to_string(); if (!region.is_empty()) language_id.region = region.to_string(); for (size_t i = 0; i < variants_size; ++i) language_id.variants.append(variants[i].to_string()); return language_id; } bool matches_variants(Vector const& other_variants) const { if (variants_size == 0) return true; if (other_variants.size() != variants_size) return false; for (size_t i = 0; i < variants_size; ++i) { if (variants[i] != other_variants[i]) return false; } return true; }; StringView language {}; StringView script {}; StringView region {}; Array variants {}; size_t variants_size { 0 }; }; struct LanguageMapping { CanonicalLanguageID key; CanonicalLanguageID alias; }; )~~~"); auto append_complex_mapping = [&](StringView name, auto& mappings) { generator.set("size", String::number(mappings.size())); generator.set("name"sv, name); generator.append(R"~~~( static constexpr Array s_@name@ { { )~~~"); quick_sort(mappings, [](auto const& lhs, auto const& rhs) { auto const& lhs_language = lhs.key.language; auto const& rhs_language = rhs.key.language; // Sort the keys such that "und" language tags are at the end, as those are less specific. if (lhs_language.starts_with("und"sv) && !rhs_language.starts_with("und"sv)) return false; if (!lhs_language.starts_with("und"sv) && rhs_language.starts_with("und"sv)) return true; return lhs_language < rhs_language; }); for (auto const& mapping : mappings) { generator.set("language"sv, mapping.key.language); generator.append(" { { \"@language@\"sv"); append_string(mapping.key.script); append_string(mapping.key.region); append_list_and_size(mapping.key.variants); generator.set("language"sv, mapping.alias.language); generator.append(" }, { \"@language@\"sv"); append_string(mapping.alias.script); append_string(mapping.alias.region); append_list_and_size(mapping.alias.variants); generator.append(" } },\n"); } generator.append("} };\n"); }; append_complex_mapping("complex_alias"sv, locale_data.complex_mappings); append_complex_mapping("likely_subtags"sv, locale_data.likely_subtags); generator.append(R"~~~( static LanguageMapping const* resolve_likely_subtag(Unicode::LanguageID const& language_id) { // https://unicode.org/reports/tr35/#Likely_Subtags enum class State { LanguageScriptRegion, LanguageRegion, LanguageScript, Language, UndScript, Done, }; auto state = State::LanguageScriptRegion; while (state != State::Done) { CanonicalLanguageID search_key; switch (state) { case State::LanguageScriptRegion: state = State::LanguageRegion; if (!language_id.script.has_value() || !language_id.region.has_value()) continue; search_key.language = *language_id.language; search_key.script = *language_id.script; search_key.region = *language_id.region; break; case State::LanguageRegion: state = State::LanguageScript; if (!language_id.region.has_value()) continue; search_key.language = *language_id.language; search_key.region = *language_id.region; break; case State::LanguageScript: state = State::Language; if (!language_id.script.has_value()) continue; search_key.language = *language_id.language; search_key.script = *language_id.script; break; case State::Language: state = State::UndScript; search_key.language = *language_id.language; break; case State::UndScript: state = State::Done; if (!language_id.script.has_value()) continue; search_key.language = "und"sv; search_key.script = *language_id.script; break; default: VERIFY_NOT_REACHED(); } for (auto const& map : s_likely_subtags) { if (map.key.language != search_key.language) continue; if (map.key.script != search_key.script) continue; if (map.key.region != search_key.region) continue; return ↦ } } return nullptr; } namespace Detail { )~~~"); auto append_mapping_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) { generator.set("enum_title", enum_title); generator.set("enum_snake", enum_snake); generator.set("collection_name", collection_name); generator.append(R"~~~( Optional get_locale_@enum_snake@_mapping(StringView locale, StringView @enum_snake@) { auto locale_value = locale_from_string(locale); if (!locale_value.has_value()) return {}; auto @enum_snake@_value = @enum_snake@_from_string(@enum_snake@); if (!@enum_snake@_value.has_value()) return {}; auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. auto @enum_snake@_index = to_underlying(*@enum_snake@_value); auto const& mappings = @collection_name@.at(locale_index); auto @enum_snake@_mapping = mappings.at(@enum_snake@_index); if (@enum_snake@_mapping.is_empty()) return {}; return @enum_snake@_mapping; } )~~~"); }; auto append_from_string = [&](StringView enum_title, StringView enum_snake, Vector const& values) { generator.set("enum_title", enum_title); generator.set("enum_snake", enum_snake); generator.append(R"~~~( Optional<@enum_title@> @enum_snake@_from_string(StringView const& @enum_snake@) { static HashMap @enum_snake@_values { {)~~~"); for (auto const& value : values) { generator.set("key"sv, value); generator.set("value"sv, format_identifier(enum_title, value)); generator.append(R"~~~( { "@key@"sv, @enum_title@::@value@ },)~~~"); } generator.append(R"~~~( } }; if (auto value = @enum_snake@_values.get(@enum_snake@); value.has_value()) return value.value(); return {}; } )~~~"); }; auto append_alias_search = [&](StringView enum_snake, HashMap const& aliases) { generator.set("enum_snake", enum_snake); generator.append(R"~~~( Optional resolve_@enum_snake@_alias(StringView const& @enum_snake@) { static HashMap @enum_snake@_aliases { { )~~~"); constexpr size_t max_values_per_row = 10; size_t values_in_current_row = 0; for (auto const& alias : aliases) { if (values_in_current_row++ > 0) generator.append(" "); generator.set("key"sv, alias.key); generator.set("alias"sv, alias.value); generator.append("{ \"@key@\"sv, \"@alias@\"sv },"); if (values_in_current_row == max_values_per_row) { generator.append("\n "); values_in_current_row = 0; } } generator.append(R"~~~( } }; if (auto alias = @enum_snake@_aliases.get(@enum_snake@); alias.has_value()) return alias.value(); return {}; } )~~~"); }; append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys()); append_mapping_search("Language"sv, "language"sv, "s_languages"sv); append_from_string("Language"sv, "language"sv, locale_data.languages); append_alias_search("language"sv, locale_data.language_aliases); append_mapping_search("Territory"sv, "territory"sv, "s_territories"sv); append_from_string("Territory"sv, "territory"sv, locale_data.territories); append_alias_search("territory"sv, locale_data.territory_aliases); append_mapping_search("ScriptTag"sv, "script_tag"sv, "s_scripts"sv); append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts); append_alias_search("script_tag"sv, locale_data.script_aliases); append_mapping_search("Currency"sv, "currency"sv, "s_currencies"sv); append_from_string("Currency"sv, "currency"sv, locale_data.currencies); append_alias_search("variant"sv, locale_data.variant_aliases); append_alias_search("subdivision"sv, locale_data.subdivision_aliases); append_from_string("ListPatternType"sv, "list_pattern_type"sv, locale_data.list_pattern_types); append_from_string("ListPatternStyle"sv, "list_pattern_style"sv, locale_data.list_pattern_styles); generator.append(R"~~~( Optional get_locale_list_pattern_mapping(StringView locale, StringView list_pattern_type, StringView list_pattern_style) { auto locale_value = locale_from_string(locale); if (!locale_value.has_value()) return {}; auto type_value = list_pattern_type_from_string(list_pattern_type); if (!type_value.has_value()) return {}; auto style_value = list_pattern_style_from_string(list_pattern_style); if (!style_value.has_value()) return {}; auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. auto const& locale_list_patterns = s_list_patterns.at(locale_index); for (auto const& list_patterns : locale_list_patterns) { if ((list_patterns.type == type_value) && (list_patterns.style == style_value)) return ListPatterns { list_patterns.start, list_patterns.middle, list_patterns.end, list_patterns.pair }; } return {}; } void resolve_complex_language_aliases(Unicode::LanguageID& language_id) { for (auto const& map : s_complex_alias) { if ((map.key.language != language_id.language) && (map.key.language != "und"sv)) continue; if (!map.key.script.is_empty() && (map.key.script != language_id.script)) continue; if (!map.key.region.is_empty() && (map.key.region != language_id.region)) continue; if (!map.key.matches_variants(language_id.variants)) continue; auto alias = map.alias.to_unicode_language_id(); if (alias.language == "und"sv) alias.language = move(language_id.language); if (map.key.script.is_empty() && !alias.script.has_value()) alias.script = move(language_id.script); if (map.key.region.is_empty() && !alias.region.has_value()) alias.region = move(language_id.region); if (map.key.variants_size == 0 && alias.variants.is_empty()) alias.variants = move(language_id.variants); language_id = move(alias); break; } } Optional add_likely_subtags(Unicode::LanguageID const& language_id) { // https://www.unicode.org/reports/tr35/#Likely_Subtags auto const* likely_subtag = resolve_likely_subtag(language_id); if (likely_subtag == nullptr) return {}; auto maximized = language_id; auto const& key = likely_subtag->key; auto const& alias = likely_subtag->alias; if (maximized.language == "und"sv) maximized.language = alias.language; if (!maximized.script.has_value() || (!key.script.is_empty() && !alias.script.is_empty())) maximized.script = alias.script; if (!maximized.region.has_value() || (!key.region.is_empty() && !alias.region.is_empty())) maximized.region = alias.region; return maximized; } Optional resolve_most_likely_territory(Unicode::LanguageID const& language_id) { if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr) return likely_subtag->alias.region; return {}; } } } )~~~"); write_to_file_if_different(file, generator.as_string_view()); } int main(int argc, char** argv) { char const* generated_header_path = nullptr; char const* generated_implementation_path = nullptr; char const* core_path = nullptr; char const* locale_names_path = nullptr; char const* misc_path = nullptr; char const* numbers_path = nullptr; Core::ArgsParser args_parser; args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path"); args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path"); args_parser.parse(argc, argv); auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) { if (path.is_empty()) { warnln("{} is required", flags); args_parser.print_usage(stderr, argv[0]); exit(1); } auto file_or_error = Core::File::open(path, mode); if (file_or_error.is_error()) { warnln("Failed to open {}: {}", path, file_or_error.release_error()); exit(1); } return file_or_error.release_value(); }; auto generated_header_file = open_file(generated_header_path, "-h/--generated-header-path", Core::OpenMode::ReadWrite); auto generated_implementation_file = open_file(generated_implementation_path, "-c/--generated-implementation-path", Core::OpenMode::ReadWrite); UnicodeLocaleData locale_data; parse_all_locales(core_path, locale_names_path, misc_path, numbers_path, locale_data); generate_unicode_locale_header(generated_header_file, locale_data); generate_unicode_locale_implementation(generated_implementation_file, locale_data); return 0; }