summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibUnicode
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-08-04 07:46:36 -0400
committerLinus Groh <mail@linusgroh.de>2021-08-04 13:50:32 +0100
commit9113f892a77237df2504d5a994dd4aec81434909 (patch)
tree540a4d3b4ddf958ec462ce8b9386973a514bd73d /Userland/Libraries/LibUnicode
parent484ccfadc366545d6969947a6d9fb48cb88be3cf (diff)
downloadserenity-9113f892a77237df2504d5a994dd4aec81434909.zip
LibUnicode: Parse UCD emoji-data.txt and generate Unicode property
Diffstat (limited to 'Userland/Libraries/LibUnicode')
-rw-r--r--Userland/Libraries/LibUnicode/CharacterTypes.cpp12
-rw-r--r--Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp4
-rw-r--r--Userland/Libraries/LibUnicode/unicode_data.cmake11
3 files changed, 19 insertions, 8 deletions
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
index 114cfb1c61..cf866f2ebc 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -275,12 +275,12 @@ bool is_ecma262_property([[maybe_unused]] Property property)
case Unicode::Property::Default_Ignorable_Code_Point:
case Unicode::Property::Deprecated:
case Unicode::Property::Diacritic:
- // case Unicode::Property::Emoji:
- // case Unicode::Property::Emoji_Component:
- // case Unicode::Property::Emoji_Modifier:
- // case Unicode::Property::Emoji_Modifier_Base:
- // case Unicode::Property::Emoji_Presentation:
- // case Unicode::Property::Extended_Pictographic:
+ case Unicode::Property::Emoji:
+ case Unicode::Property::Emoji_Component:
+ case Unicode::Property::Emoji_Modifier:
+ case Unicode::Property::Emoji_Modifier_Base:
+ case Unicode::Property::Emoji_Presentation:
+ case Unicode::Property::Extended_Pictographic:
case Unicode::Property::Extender:
case Unicode::Property::Grapheme_Base:
case Unicode::Property::Grapheme_Extend:
diff --git a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp
index 6d3b30308c..619ed6ebd1 100644
--- a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp
+++ b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp
@@ -828,6 +828,7 @@ int main(int argc, char** argv)
char const* scripts_path = nullptr;
char const* script_extensions_path = nullptr;
char const* word_break_path = nullptr;
+ char const* emoji_data_path = nullptr;
Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
@@ -841,6 +842,7 @@ int main(int argc, char** argv)
args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path");
args_parser.add_option(script_extensions_path, "Path to ScriptExtensions.txt file", "script-extensions-path", 'x', "script-extensions-path");
args_parser.add_option(word_break_path, "Path to WordBreakProperty.txt file", "word-break-path", 'w', "word-break-path");
+ args_parser.add_option(emoji_data_path, "Path to emoji-data.txt file", "emoji-data-path", 'e', "emoji-data-path");
args_parser.parse(argc, argv);
auto open_file = [&](StringView path, StringView flags, Core::OpenMode mode = Core::OpenMode::ReadOnly) {
@@ -870,11 +872,13 @@ int main(int argc, char** argv)
auto scripts_file = open_file(scripts_path, "-r/--scripts-path");
auto script_extensions_file = open_file(script_extensions_path, "-x/--script-extensions-path");
auto word_break_file = open_file(word_break_path, "-w/--word-break-path");
+ auto emoji_data_file = open_file(emoji_data_path, "-e/--emoji-data-path");
UnicodeData unicode_data {};
parse_special_casing(special_casing_file, unicode_data);
parse_prop_list(prop_list_file, unicode_data.prop_list);
parse_prop_list(derived_core_prop_file, unicode_data.prop_list);
+ parse_prop_list(emoji_data_file, unicode_data.prop_list);
parse_alias_list(prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases);
parse_prop_list(scripts_file, unicode_data.script_list);
parse_prop_list(script_extensions_file, unicode_data.script_extensions, true);
diff --git a/Userland/Libraries/LibUnicode/unicode_data.cmake b/Userland/Libraries/LibUnicode/unicode_data.cmake
index dbaeb907b1..2c547654c5 100644
--- a/Userland/Libraries/LibUnicode/unicode_data.cmake
+++ b/Userland/Libraries/LibUnicode/unicode_data.cmake
@@ -27,6 +27,9 @@ set(SCRIPT_EXTENSIONS_PATH ${CMAKE_BINARY_DIR}/UCD/ScriptExtensions.txt)
set(WORD_BREAK_URL https://www.unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt)
set(WORD_BREAK_PATH ${CMAKE_BINARY_DIR}/UCD/WordBreakProperty.txt)
+set(EMOJI_DATA_URL https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt)
+set(EMOJI_DATA_PATH ${CMAKE_BINARY_DIR}/UCD/emoji-data.txt)
+
if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
if (NOT EXISTS ${UNICODE_DATA_PATH})
message(STATUS "Downloading UCD UnicodeData.txt from ${UNICODE_DATA_URL}...")
@@ -64,6 +67,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
message(STATUS "Downloading UCD WordBreakProperty.txt from ${WORD_BREAK_URL}...")
file(DOWNLOAD ${WORD_BREAK_URL} ${WORD_BREAK_PATH} INACTIVITY_TIMEOUT 10)
endif()
+ if (NOT EXISTS ${EMOJI_DATA_PATH})
+ message(STATUS "Downloading UCD emoji-data.txt from ${EMOJI_DATA_URL}...")
+ file(DOWNLOAD ${EMOJI_DATA_URL} ${EMOJI_DATA_PATH} INACTIVITY_TIMEOUT 10)
+ endif()
set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h)
set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp)
@@ -75,9 +82,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
add_custom_command(
OUTPUT ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION}
- COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH}
+ COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH} -e ${EMOJI_DATA_PATH}
VERBATIM
- DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH}
+ DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH} ${EMOJI_DATA_PATH}
)
set(UNICODE_DATA_SOURCES ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION})