diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-07-27 22:21:53 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-07-28 23:42:29 +0200 |
commit | dff156b7c679624734f54f1aa0e2cd26d78a8a56 (patch) | |
tree | a4d1cf8375f481910b5ccd116de96d001bf6a18c /Userland/Libraries/LibUnicode/CodeGenerators | |
parent | 7827aede6fd8f113e95f7d7225d77039accd4cfa (diff) | |
download | serenity-dff156b7c679624734f54f1aa0e2cd26d78a8a56.zip |
LibUnicode: Reduce Unicode data generator boilerplate
There's a fair amount of boilerplate when e.g. adding a new UCD file to
parse or a new enumeration to generate. Reduce the overhead by adding
helper lambdas. Also adds a couple missing spec links with UCD field
information.
Diffstat (limited to 'Userland/Libraries/LibUnicode/CodeGenerators')
-rw-r--r-- | Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp | 155 |
1 files changed, 47 insertions, 108 deletions
diff --git a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp index 986723cdda..7b0c6f0fcb 100644 --- a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp +++ b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp @@ -42,10 +42,12 @@ struct SpecialCasing { // PropList source: https://www.unicode.org/Public/13.0.0/ucd/PropList.txt // Property descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#PropList.txt +// https://www.unicode.org/reports/tr44/tr44-13.html#WordBreakProperty.txt using PropList = HashMap<String, Vector<CodePointRange>>; // UnicodeData source: https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt // Field descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#UnicodeData.txt +// https://www.unicode.org/reports/tr44/#General_Category_Values struct CodePointData { u32 index { 0 }; u32 code_point { 0 }; @@ -150,9 +152,6 @@ static void parse_special_casing(Core::File& file, UnicodeData& unicode_data) unicode_data.special_casing.append(move(casing)); } - - quick_sort(unicode_data.locales); - quick_sort(unicode_data.conditions); } static void parse_prop_list(Core::File& file, PropList& prop_list) @@ -279,11 +278,10 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data) unicode_data.code_point_data.append(move(data)); } - quick_sort(unicode_data.general_categories); unicode_data.last_contiguous_code_point = *last_contiguous_code_point; } -static void generate_unicode_data_header(UnicodeData const& unicode_data) +static void generate_unicode_data_header(UnicodeData& unicode_data) { StringBuilder builder; SourceGenerator generator { builder }; @@ -291,79 +289,43 @@ static void generate_unicode_data_header(UnicodeData const& unicode_data) generator.set("special_casing_size", String::number(unicode_data.largest_special_casing_size)); generator.set("prop_list_size", String::number(unicode_data.largest_prop_list_size)); - generator.append(R"~~~( -#pragma once - -#include <AK/Optional.h> -#include <AK/Types.h> - -namespace Unicode { - -enum class Locale { - None,)~~~"); - - for (auto const& locale : unicode_data.locales) { - generator.set("locale", locale); - generator.append(R"~~~( - @locale@,)~~~"); - } - - generator.append(R"~~~( -}; - -enum class Condition { - None,)~~~"); + auto generate_enum = [&](StringView name, StringView default_, Vector<String> values) { + quick_sort(values); - for (auto const& condition : unicode_data.conditions) { - generator.set("condition", condition); + generator.set("name", name); generator.append(R"~~~( - @condition@,)~~~"); - } +enum class @name@ {)~~~"); - generator.append(R"~~~( -}; + if (!default_.is_empty()) + values.prepend(default_); -// https://www.unicode.org/reports/tr44/#General_Category_Values -enum class GeneralCategory {)~~~"); + for (auto const& value : values) { + generator.set("value", value); + generator.append(R"~~~( + @value@,)~~~"); + } - for (auto const& general_category : unicode_data.general_categories) { - generator.set("general_category", general_category); generator.append(R"~~~( - @general_category@,)~~~"); - } - - generator.append(R"~~~( }; - -enum class Property {)~~~"); - - auto properties = unicode_data.prop_list.keys(); - quick_sort(properties); - - for (auto const& property : properties) { - generator.set("property", property); - generator.append(R"~~~( - @property@,)~~~"); - } +)~~~"); + }; generator.append(R"~~~( -}; +#pragma once -enum class WordBreakProperty { - Other,)~~~"); +#include <AK/Optional.h> +#include <AK/Types.h> - properties = unicode_data.word_break_prop_list.keys(); - quick_sort(properties); +namespace Unicode { +)~~~"); - for (auto const& property : properties) { - generator.set("property", property); - generator.append(R"~~~( - @property@,)~~~"); - } + generate_enum("Locale"sv, "None"sv, move(unicode_data.locales)); + generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions)); + generate_enum("GeneralCategory"sv, {}, move(unicode_data.general_categories)); + generate_enum("Property"sv, {}, unicode_data.prop_list.keys()); + generate_enum("WordBreakProperty"sv, "Other"sv, unicode_data.word_break_prop_list.keys()); generator.append(R"~~~( -}; - struct SpecialCasing { u32 code_point { 0 }; @@ -590,56 +552,33 @@ int main(int argc, char** argv) args_parser.print_usage(stderr, argv[0]); return 1; } - if (!unicode_data_path) { - warnln("-u/--unicode-data-path is required"); - args_parser.print_usage(stderr, argv[0]); - return 1; - } - if (!special_casing_path) { - warnln("-s/--special-casing-path is required"); - args_parser.print_usage(stderr, argv[0]); - return 1; - } - if (!prop_list_path) { - warnln("-p/--prop-list-path is required"); - args_parser.print_usage(stderr, argv[0]); - return 1; - } - if (!word_break_path) { - warnln("-w/--word-break-path is required"); - args_parser.print_usage(stderr, argv[0]); - return 1; - } - auto unicode_data_file_or_error = Core::File::open(unicode_data_path, Core::OpenMode::ReadOnly); - if (unicode_data_file_or_error.is_error()) { - warnln("Failed to open {}: {}", unicode_data_path, unicode_data_file_or_error.release_error()); - return 1; - } + auto open_file = [&](StringView path, StringView flags) { + if (path.is_empty()) { + warnln("{} is required", flags); + args_parser.print_usage(stderr, argv[0]); + exit(1); + } - auto special_casing_file_or_error = Core::File::open(special_casing_path, Core::OpenMode::ReadOnly); - if (special_casing_file_or_error.is_error()) { - warnln("Failed to open {}: {}", special_casing_path, special_casing_file_or_error.release_error()); - return 1; - } + auto file_or_error = Core::File::open(path, Core::OpenMode::ReadOnly); + if (file_or_error.is_error()) { + warnln("Failed to open {}: {}", path, file_or_error.release_error()); + exit(1); + } - auto prop_list_file_or_error = Core::File::open(prop_list_path, Core::OpenMode::ReadOnly); - if (prop_list_file_or_error.is_error()) { - warnln("Failed to open {}: {}", prop_list_path, prop_list_file_or_error.release_error()); - return 1; - } + return file_or_error.release_value(); + }; - auto word_break_file_or_error = Core::File::open(word_break_path, Core::OpenMode::ReadOnly); - if (word_break_file_or_error.is_error()) { - warnln("Failed to open {}: {}", word_break_path, word_break_file_or_error.release_error()); - return 1; - } + auto unicode_data_file = open_file(unicode_data_path, "-u/--unicode-data-path"); + auto special_casing_file = open_file(special_casing_path, "-s/--special-casing-path"); + auto prop_list_file = open_file(prop_list_path, "-p/--prop-list-path"); + auto word_break_file = open_file(word_break_path, "-w/--word-break-path"); UnicodeData unicode_data {}; - parse_special_casing(special_casing_file_or_error.value(), unicode_data); - parse_prop_list(prop_list_file_or_error.value(), unicode_data.prop_list); - parse_prop_list(word_break_file_or_error.value(), unicode_data.word_break_prop_list); - parse_unicode_data(unicode_data_file_or_error.value(), unicode_data); + parse_special_casing(special_casing_file, unicode_data); + parse_prop_list(prop_list_file, unicode_data.prop_list); + parse_prop_list(word_break_file, unicode_data.word_break_prop_list); + parse_unicode_data(unicode_data_file, unicode_data); if (generate_header) generate_unicode_data_header(unicode_data); |