summaryrefslogtreecommitdiff
path: root/Meta/CMake/unicode_data.cmake
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2022-04-06 14:32:26 -0400
committerBrian Gianforcaro <b.gianfo@gmail.com>2022-04-06 17:12:08 -0700
commit8a46794ff8b7696c524d7162a6b7581cf05ab1e1 (patch)
tree71f275ece6ba39283455218d604f03d708aece86 /Meta/CMake/unicode_data.cmake
parent8545e2dec0b7760ee0b49023d5e94881a6364126 (diff)
downloadserenity-8a46794ff8b7696c524d7162a6b7581cf05ab1e1.zip
LibUnicode: Replace individual UCD file downloads with single UCD.zip
Instead of downloading nearly 20 files individually, we can download a single .zip file similar to how we download a single CLDR .zip. This is to reduce the number of connections/downloads to/from unicode.org.
Diffstat (limited to 'Meta/CMake/unicode_data.cmake')
-rw-r--r--Meta/CMake/unicode_data.cmake130
1 files changed, 67 insertions, 63 deletions
diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake
index c4965a473a..dfce43fe92 100644
--- a/Meta/CMake/unicode_data.cmake
+++ b/Meta/CMake/unicode_data.cmake
@@ -13,56 +13,59 @@ set(CLDR_PATH "${CMAKE_BINARY_DIR}/CLDR" CACHE PATH "Download location for CLDR
set(UCD_VERSION_FILE "${UCD_PATH}/version.txt")
set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt")
-set(UNICODE_DATA_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt")
-set(UNICODE_DATA_PATH "${UCD_PATH}/UnicodeData.txt")
+set(UCD_ZIP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/UCD.zip")
+set(UCD_ZIP_PATH "${UCD_PATH}/UCD.zip")
-set(SPECIAL_CASING_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/SpecialCasing.txt")
-set(SPECIAL_CASING_PATH "${UCD_PATH}/SpecialCasing.txt")
+set(UNICODE_DATA_SOURCE "UnicodeData.txt")
+set(UNICODE_DATA_PATH "${UCD_PATH}/${UNICODE_DATA_SOURCE}")
-set(DERIVED_GENERAL_CATEGORY_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/extracted/DerivedGeneralCategory.txt")
-set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/DerivedGeneralCategory.txt")
+set(SPECIAL_CASING_SOURCE "SpecialCasing.txt")
+set(SPECIAL_CASING_PATH "${UCD_PATH}/${SPECIAL_CASING_SOURCE}")
-set(PROP_LIST_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt")
-set(PROP_LIST_PATH "${UCD_PATH}/PropList.txt")
+set(DERIVED_GENERAL_CATEGORY_SOURCE "extracted/DerivedGeneralCategory.txt")
+set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/${DERIVED_GENERAL_CATEGORY_SOURCE}")
-set(DERIVED_CORE_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/DerivedCoreProperties.txt")
-set(DERIVED_CORE_PROP_PATH "${UCD_PATH}/DerivedCoreProperties.txt")
+set(PROP_LIST_SOURCE "PropList.txt")
+set(PROP_LIST_PATH "${UCD_PATH}/${PROP_LIST_SOURCE}")
-set(DERIVED_BINARY_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/extracted/DerivedBinaryProperties.txt")
-set(DERIVED_BINARY_PROP_PATH "${UCD_PATH}/DerivedBinaryProperties.txt")
+set(DERIVED_CORE_PROP_SOURCE "DerivedCoreProperties.txt")
+set(DERIVED_CORE_PROP_PATH "${UCD_PATH}/${DERIVED_CORE_PROP_SOURCE}")
-set(PROP_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropertyAliases.txt")
-set(PROP_ALIAS_PATH "${UCD_PATH}/PropertyAliases.txt")
+set(DERIVED_BINARY_PROP_SOURCE "extracted/DerivedBinaryProperties.txt")
+set(DERIVED_BINARY_PROP_PATH "${UCD_PATH}/${DERIVED_BINARY_PROP_SOURCE}")
-set(PROP_VALUE_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropertyValueAliases.txt")
-set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/PropertyValueAliases.txt")
+set(PROP_ALIAS_SOURCE "PropertyAliases.txt")
+set(PROP_ALIAS_PATH "${UCD_PATH}/${PROP_ALIAS_SOURCE}")
-set(NAME_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/NameAliases.txt")
-set(NAME_ALIAS_PATH "${UCD_PATH}/NameAliases.txt")
+set(PROP_VALUE_ALIAS_SOURCE "PropertyValueAliases.txt")
+set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/${PROP_VALUE_ALIAS_SOURCE}")
-set(SCRIPTS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/Scripts.txt")
-set(SCRIPTS_PATH "${UCD_PATH}/Scripts.txt")
+set(NAME_ALIAS_SOURCE "NameAliases.txt")
+set(NAME_ALIAS_PATH "${UCD_PATH}/${NAME_ALIAS_SOURCE}")
-set(SCRIPT_EXTENSIONS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/ScriptExtensions.txt")
-set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/ScriptExtensions.txt")
+set(SCRIPTS_SOURCE "Scripts.txt")
+set(SCRIPTS_PATH "${UCD_PATH}/${SCRIPTS_SOURCE}")
-set(BLOCKS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/Blocks.txt")
-set(BLOCKS_PATH "${UCD_PATH}/Blocks.txt")
+set(SCRIPT_EXTENSIONS_SOURCE "ScriptExtensions.txt")
+set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/${SCRIPT_EXTENSIONS_SOURCE}")
-set(EMOJI_DATA_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/emoji/emoji-data.txt")
-set(EMOJI_DATA_PATH "${UCD_PATH}/emoji-data.txt")
+set(BLOCKS_SOURCE "Blocks.txt")
+set(BLOCKS_PATH "${UCD_PATH}/${BLOCKS_SOURCE}")
-set(NORM_PROPS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/DerivedNormalizationProps.txt")
-set(NORM_PROPS_PATH "${UCD_PATH}/DerivedNormalizationProps.txt")
+set(EMOJI_DATA_SOURCE "emoji/emoji-data.txt")
+set(EMOJI_DATA_PATH "${UCD_PATH}/${EMOJI_DATA_SOURCE}")
-set(GRAPHEME_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/GraphemeBreakProperty.txt")
-set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/GraphemeBreakProperty.txt")
+set(NORM_PROPS_SOURCE "DerivedNormalizationProps.txt")
+set(NORM_PROPS_PATH "${UCD_PATH}/${NORM_PROPS_SOURCE}")
-set(WORD_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/WordBreakProperty.txt")
-set(WORD_BREAK_PROP_PATH "${UCD_PATH}/WordBreakProperty.txt")
+set(GRAPHEME_BREAK_PROP_SOURCE "auxiliary/GraphemeBreakProperty.txt")
+set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/${GRAPHEME_BREAK_PROP_SOURCE}")
-set(SENTENCE_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/SentenceBreakProperty.txt")
-set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/SentenceBreakProperty.txt")
+set(WORD_BREAK_PROP_SOURCE "auxiliary/WordBreakProperty.txt")
+set(WORD_BREAK_PROP_PATH "${UCD_PATH}/${WORD_BREAK_PROP_SOURCE}")
+
+set(SENTENCE_BREAK_PROP_SOURCE "auxiliary/SentenceBreakProperty.txt")
+set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}")
set(CLDR_ZIP_URL "https://github.com/unicode-org/cldr-json/releases/download/${CLDR_REAL_VERSION}/cldr-${CLDR_REAL_VERSION}-json-modern.zip")
set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip")
@@ -88,12 +91,12 @@ set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}")
set(CLDR_UNITS_SOURCE cldr-units-modern)
set(CLDR_UNITS_PATH "${CLDR_PATH}/${CLDR_UNITS_SOURCE}")
-function(extract_cldr_file source path)
- if(EXISTS "${CLDR_ZIP_PATH}" AND NOT EXISTS "${path}")
- message(STATUS "Extracting CLDR ${source} from ${CLDR_ZIP_PATH}...")
- execute_process(COMMAND "${UNZIP_TOOL}" -q "${CLDR_ZIP_PATH}" "${source}/**" -d "${CLDR_PATH}" RESULT_VARIABLE unzip_result)
+function(extract_path dest_dir zip_path source_path dest_path)
+ if (EXISTS "${zip_path}" AND NOT EXISTS "${dest_path}")
+ message(STATUS "Extracting ${source_path} from ${zip_path}")
+ execute_process(COMMAND "${UNZIP_TOOL}" -q "${zip_path}" "${source_path}" -d "${dest_dir}" RESULT_VARIABLE unzip_result)
if (NOT unzip_result EQUAL 0)
- message(FATAL_ERROR "Failed to unzip ${source} from ${CLDR_ZIP_PATH} with status ${unzip_result}")
+ message(FATAL_ERROR "Failed to unzip ${source_path} from ${zip_path} with status ${unzip_result}")
endif()
endif()
endfunction()
@@ -102,32 +105,33 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
remove_path_if_version_changed("${UCD_VERSION}" "${UCD_VERSION_FILE}" "${UCD_PATH}")
remove_path_if_version_changed("${CLDR_VERSION}" "${CLDR_VERSION_FILE}" "${CLDR_PATH}")
- download_file("${UNICODE_DATA_URL}" "${UNICODE_DATA_PATH}")
- download_file("${SPECIAL_CASING_URL}" "${SPECIAL_CASING_PATH}")
- download_file("${DERIVED_GENERAL_CATEGORY_URL}" "${DERIVED_GENERAL_CATEGORY_PATH}")
- download_file("${PROP_LIST_URL}" "${PROP_LIST_PATH}")
- download_file("${DERIVED_CORE_PROP_URL}" "${DERIVED_CORE_PROP_PATH}")
- download_file("${DERIVED_BINARY_PROP_URL}" "${DERIVED_BINARY_PROP_PATH}")
- download_file("${PROP_ALIAS_URL}" "${PROP_ALIAS_PATH}")
- download_file("${PROP_VALUE_ALIAS_URL}" "${PROP_VALUE_ALIAS_PATH}")
- download_file("${NAME_ALIAS_URL}" "${NAME_ALIAS_PATH}")
- download_file("${SCRIPTS_URL}" "${SCRIPTS_PATH}")
- download_file("${SCRIPT_EXTENSIONS_URL}" "${SCRIPT_EXTENSIONS_PATH}")
- download_file("${BLOCKS_URL}" "${BLOCKS_PATH}")
- download_file("${EMOJI_DATA_URL}" "${EMOJI_DATA_PATH}")
- download_file("${NORM_PROPS_URL}" "${NORM_PROPS_PATH}")
- download_file("${GRAPHEME_BREAK_PROP_URL}" "${GRAPHEME_BREAK_PROP_PATH}")
- download_file("${WORD_BREAK_PROP_URL}" "${WORD_BREAK_PROP_PATH}")
- download_file("${SENTENCE_BREAK_PROP_URL}" "${SENTENCE_BREAK_PROP_PATH}")
+ download_file("${UCD_ZIP_URL}" "${UCD_ZIP_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${UNICODE_DATA_SOURCE}" "${UNICODE_DATA_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SPECIAL_CASING_SOURCE}" "${SPECIAL_CASING_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_GENERAL_CATEGORY_SOURCE}" "${DERIVED_GENERAL_CATEGORY_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_LIST_SOURCE}" "${PROP_LIST_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_CORE_PROP_SOURCE}" "${DERIVED_CORE_PROP_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_BINARY_PROP_SOURCE}" "${DERIVED_BINARY_PROP_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_ALIAS_SOURCE}" "${PROP_ALIAS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_VALUE_ALIAS_SOURCE}" "${PROP_VALUE_ALIAS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NAME_ALIAS_SOURCE}" "${NAME_ALIAS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPTS_SOURCE}" "${SCRIPTS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${BLOCKS_SOURCE}" "${BLOCKS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${EMOJI_DATA_SOURCE}" "${EMOJI_DATA_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NORM_PROPS_SOURCE}" "${NORM_PROPS_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${GRAPHEME_BREAK_PROP_SOURCE}" "${GRAPHEME_BREAK_PROP_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${WORD_BREAK_PROP_SOURCE}" "${WORD_BREAK_PROP_PATH}")
+ extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SENTENCE_BREAK_PROP_SOURCE}" "${SENTENCE_BREAK_PROP_PATH}")
download_file("${CLDR_ZIP_URL}" "${CLDR_ZIP_PATH}")
- extract_cldr_file("${CLDR_BCP47_SOURCE}" "${CLDR_BCP47_PATH}")
- extract_cldr_file("${CLDR_CORE_SOURCE}" "${CLDR_CORE_PATH}")
- extract_cldr_file("${CLDR_DATES_SOURCE}" "${CLDR_DATES_PATH}")
- extract_cldr_file("${CLDR_LOCALES_SOURCE}" "${CLDR_LOCALES_PATH}")
- extract_cldr_file("${CLDR_MISC_SOURCE}" "${CLDR_MISC_PATH}")
- extract_cldr_file("${CLDR_NUMBERS_SOURCE}" "${CLDR_NUMBERS_PATH}")
- extract_cldr_file("${CLDR_UNITS_SOURCE}" "${CLDR_UNITS_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_LOCALES_SOURCE}/**" "${CLDR_LOCALES_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_MISC_SOURCE}/**" "${CLDR_MISC_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}")
+ extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_UNITS_SOURCE}/**" "${CLDR_UNITS_PATH}")
set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h)
set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp)