diff options
author | Timothy Flynn <trflynn89@pm.me> | 2022-10-26 16:04:38 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-10-27 12:59:56 +0200 |
commit | bd592480e467c9f06cc4abc4297af8a5ad977b95 (patch) | |
tree | 839ce12266cce557ed37c780f98d090bc1ca3701 /Meta | |
parent | b5a876e606be7133d2728758df6a158f543fac73 (diff) | |
download | serenity-bd592480e467c9f06cc4abc4297af8a5ad977b95.zip |
Meta: Replace Bash script for generating emoji.txt with C++ generator
We currently have two build-time parsers for the UCD's emoji-test.txt
file. To prepare for future changes, this removes the Bash parser and
moves its functionality to the newer C++ parser.
Diffstat (limited to 'Meta')
-rw-r--r-- | Meta/CMake/unicode_data.cmake | 27 | ||||
-rw-r--r-- | Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp | 91 | ||||
-rwxr-xr-x | Meta/generate-emoji-txt.sh | 77 |
3 files changed, 101 insertions, 94 deletions
diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index 4af4325b0b..f003b5eb71 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -61,7 +61,6 @@ set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}") string(REGEX REPLACE "([0-9]+\\.[0-9]+)\\.[0-9]+" "\\1" EMOJI_VERSION "${UCD_VERSION}") set(EMOJI_TEST_URL "https://unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test.txt") set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt") -set(EMOJI_GENERATOR_PATH "${SerenityOS_SOURCE_DIR}/Meta/generate-emoji-txt.sh") set(EMOJI_RES_PATH "${SerenityOS_SOURCE_DIR}/Base/res/emoji") set(EMOJI_SERENITY_PATH "${SerenityOS_SOURCE_DIR}/Base/home/anon/Documents/emoji-serenity.txt") set(EMOJI_INSTALL_PATH "${CMAKE_BINARY_DIR}/Root/home/anon/Documents/emoji.txt") @@ -96,6 +95,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(EMOJI_DATA_HEADER EmojiData.h) set(EMOJI_DATA_IMPLEMENTATION EmojiData.cpp) + if (SERENITYOS) + set(EMOJI_INSTALL_ARG -i "${EMOJI_INSTALL_PATH}") + endif() + invoke_generator( "UnicodeData" Lagom::GenerateUnicodeData @@ -110,22 +113,14 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${UCD_VERSION_FILE}" "${EMOJI_DATA_HEADER}" "${EMOJI_DATA_IMPLEMENTATION}" - arguments -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}" - ) + arguments "${EMOJI_INSTALL_ARG}" -e "${EMOJI_TEST_PATH}" -s "${EMOJI_SERENITY_PATH}" -r "${EMOJI_RES_PATH}" - if (SERENITYOS) - add_custom_command( - OUTPUT "${EMOJI_INSTALL_PATH}" - COMMAND "${EMOJI_GENERATOR_PATH}" "${EMOJI_TEST_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_INSTALL_PATH}" - # This will make this command only run when the modified time of the directory changes, - # which only happens if files within it are added or deleted, but not when a file is modified. - # This is fine for this use-case, because the contents of a file changing should not affect - # the generated emoji.txt file. - DEPENDS "${EMOJI_GENERATOR_PATH}" "${EMOJI_RES_PATH}" "${EMOJI_TEST_PATH}" - USES_TERMINAL - ) - add_custom_target(generate_emoji_txt ALL DEPENDS "${EMOJI_INSTALL_PATH}") - endif() + # This will make this command only run when the modified time of the directory changes, + # which only happens if files within it are added or deleted, but not when a file is modified. + # This is fine for this use-case, because the contents of a file changing should not affect + # the generated emoji.txt file. + dependencies "${EMOJI_RES_PATH}" "${EMOJI_SERENITY_PATH}" + ) set(UNICODE_DATA_SOURCES ${UNICODE_DATA_HEADER} diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp index 444fd5f475..012e8cc985 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp @@ -11,6 +11,7 @@ #include <AK/StringUtils.h> #include <AK/Types.h> #include <LibCore/ArgsParser.h> +#include <LibCore/Directory.h> #include <LibCore/Stream.h> #include <LibUnicode/Emoji.h> @@ -19,10 +20,14 @@ constexpr auto s_string_index_type = "u16"sv; struct Emoji { StringIndexType name { 0 }; + Optional<String> image_path; Unicode::EmojiGroup group; + String subgroup; u32 display_order { 0 }; - String code_points_name; Vector<u32> code_points; + String code_points_name; + String encoded_code_points; + String status; }; struct EmojiData { @@ -30,13 +35,32 @@ struct EmojiData { Vector<Emoji> emojis; }; +static void set_image_path_for_emoji(StringView emoji_resource_path, Emoji& emoji) +{ + StringBuilder builder; + + for (auto code_point : emoji.code_points) { + if (code_point == 0xfe0f) + continue; + if (!builder.is_empty()) + builder.append('_'); + builder.appendff("U+{:X}", code_point); + } + + auto path = String::formatted("{}/{}.png", emoji_resource_path, builder.build()); + if (Core::Stream::File::exists(path)) + emoji.image_path = move(path); +} + static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, EmojiData& emoji_data) { static constexpr auto group_header = "# group: "sv; + static constexpr auto subgroup_header = "# subgroup: "sv; Array<u8, 1024> buffer; Unicode::EmojiGroup group; + String subgroup; u32 display_order { 0 }; while (TRY(file.can_read_line())) { @@ -48,6 +72,8 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo if (line.starts_with(group_header)) { auto name = line.substring_view(group_header.length()); group = Unicode::emoji_group_from_string(name); + } else if (line.starts_with(subgroup_header)) { + subgroup = line.substring_view(subgroup_header.length()); } continue; @@ -61,6 +87,7 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo Emoji emoji {}; emoji.group = group; + emoji.subgroup = subgroup; emoji.display_order = display_order++; auto code_points = line.substring_view(0, *status_index).split_view(' '); @@ -81,6 +108,8 @@ static ErrorOr<void> parse_emoji_test_data(Core::Stream::BufferedFile& file, Emo auto name = emoji_and_name.substring_view(emoji_and_name_spaces[2]).trim_whitespace(); emoji.name = emoji_data.unique_strings.ensure(name.to_titlecase_string()); emoji.code_points_name = String::join('_', code_points); + emoji.encoded_code_points = emoji_and_name.substring_view(0, emoji_and_name_spaces[1]).trim_whitespace(); + emoji.status = line.substring_view(*status_index + 1, *emoji_and_name_index - *status_index - 1).trim_whitespace(); TRY(emoji_data.emojis.try_append(move(emoji))); } @@ -241,24 +270,74 @@ Optional<Emoji> find_emoji_for_code_points(Span<u32 const> code_points) return {}; } +static ErrorOr<void> generate_emoji_installation(Core::Stream::BufferedFile& file, EmojiData const& emoji_data) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + + auto current_group = Unicode::EmojiGroup::Unknown; + StringView current_subgroup; + + for (auto const& emoji : emoji_data.emojis) { + if (!emoji.image_path.has_value()) + continue; + if (emoji.group == Unicode::EmojiGroup::SerenityOS) + continue; // SerenityOS emojis are in emoji-serenity.txt + + if (current_group != emoji.group) { + if (!builder.is_empty()) + generator.append("\n"sv); + + generator.set("group"sv, Unicode::emoji_group_to_string(emoji.group)); + generator.append("# group: @group@\n"); + + current_group = emoji.group; + } + + if (current_subgroup != emoji.subgroup) { + generator.set("subgroup"sv, emoji.subgroup); + generator.append("\n# subgroup: @subgroup@\n"); + + current_subgroup = emoji.subgroup; + } + + generator.set("emoji"sv, emoji.encoded_code_points); + generator.set("name"sv, emoji_data.unique_strings.get(emoji.name)); + generator.set("status"sv, emoji.status); + + generator.append("@emoji@"sv); + generator.append(" - "sv); + generator.append(String::join(" "sv, emoji.code_points, "U+{:X}"sv)); + generator.append(" @name@ (@status@)\n"sv); + } + + TRY(file.write(generator.as_string_view().bytes())); + return {}; +} + ErrorOr<int> serenity_main(Main::Arguments arguments) { StringView generated_header_path; StringView generated_implementation_path; + StringView generated_installation_path; StringView emoji_test_path; StringView emoji_serenity_path; + StringView emoji_resource_path; Core::ArgsParser args_parser; args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path"); args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); + args_parser.add_option(generated_installation_path, "Path to the emoji.txt file to generate", "generated-installation-path", 'i', "generated-installation-path"); args_parser.add_option(emoji_test_path, "Path to emoji-test.txt file", "emoji-test-path", 'e', "emoji-test-path"); args_parser.add_option(emoji_serenity_path, "Path to emoji-serenity.txt file", "emoji-serenity-path", 's', "emoji-serenity-path"); + args_parser.add_option(emoji_resource_path, "Path to the /res/emoji directory", "emoji-resource-path", 'r', "emoji-resource-path"); args_parser.parse(arguments); auto generated_header_file = TRY(open_file(generated_header_path, Core::Stream::OpenMode::Write)); auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::Stream::OpenMode::Write)); auto emoji_test_file = TRY(open_file(emoji_test_path, Core::Stream::OpenMode::Read)); auto emoji_serenity_file = TRY(open_file(emoji_serenity_path, Core::Stream::OpenMode::Read)); + VERIFY(Core::Stream::File::exists(emoji_resource_path)); EmojiData emoji_data {}; TRY(parse_emoji_test_data(*emoji_test_file, emoji_data)); @@ -267,5 +346,15 @@ ErrorOr<int> serenity_main(Main::Arguments arguments) TRY(generate_emoji_data_header(*generated_header_file, emoji_data)); TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data)); + if (!generated_installation_path.is_empty()) { + TRY(Core::Directory::create(LexicalPath { generated_installation_path }.parent(), Core::Directory::CreateDirectories::Yes)); + + for (auto& emoji : emoji_data.emojis) + set_image_path_for_emoji(emoji_resource_path, emoji); + + auto generated_installation_file = TRY(open_file(generated_installation_path, Core::Stream::OpenMode::Write)); + TRY(generate_emoji_installation(*generated_installation_file, emoji_data)); + } + return 0; } diff --git a/Meta/generate-emoji-txt.sh b/Meta/generate-emoji-txt.sh deleted file mode 100755 index f195aa4d0d..0000000000 --- a/Meta/generate-emoji-txt.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -if [ $# -ne 3 ]; then - echo "Usage: $0 <input emoji-test.txt file> <emoji image directory> <output path>" - exit 1 -fi - -INPUT_FILE="$1" -EMOJI_DIR="$2" -OUTPUT_PATH="$3" - -# empty the generated file first -:>| "$OUTPUT_PATH" - -first_heading=true -printed_group_header=false -printed_subgroup_header=false -while IFS= read -r line -do - if [[ $line == \#\ group:\ * ]]; then - current_group="$line" - printed_group_header=false - elif [[ $line == \#\ subgroup:\ * ]]; then - current_subgroup="$line" - printed_subgroup_header=false - elif [[ ${#line} -ne 0 && $line != \#* ]]; then - codepoints_string=${line%%;*} - IFS=" " read -r -a codepoints <<< "$codepoints_string" - for i in "${!codepoints[@]}"; do - # strip leading zeros - codepoints[$i]="${codepoints[$i]#"${codepoints[$i]%%[!0]*}"}" - # add U+ prefix - codepoints[$i]="U+${codepoints[$i]}" - done - - # when doing a lookup we want to remove all U+FE0F (emoji presentation specifier) codepoints - lookup_filename_parts=() - for codepoint in "${codepoints[@]}"; do - if [[ $codepoint != "U+FE0F" ]]; then - lookup_filename_parts+=("$codepoint") - fi - done - - IFS=_ - lookup_filename="${lookup_filename_parts[*]}.png" - - if [ -f "$EMOJI_DIR/$lookup_filename" ]; then - if [ $printed_group_header = false ]; then - if [ $first_heading = false ]; then - echo "" >> "$OUTPUT_PATH" - fi - echo "$current_group" >> "$OUTPUT_PATH" - first_heading=false - printed_group_header=true - fi - if [ $printed_subgroup_header = false ]; then - echo "" >> "$OUTPUT_PATH" - echo "$current_subgroup" >> "$OUTPUT_PATH" - printed_subgroup_header=true - fi - - emoji_and_name=${line#*# } - emoji=${emoji_and_name%% E*} - name_with_version=${emoji_and_name#* } - name=${name_with_version#* } - qualification=${line#*; } - qualification=${qualification%%#*} - # remove trailing whitespace characters - qualification="${qualification%"${qualification##*[![:space:]]}"}" - - IFS=" " - echo "$emoji - ${codepoints[*]} ${name^^} ($qualification)" >> "$OUTPUT_PATH" - fi - fi -done < "$INPUT_FILE" |