diff options
author | Timothy Flynn <trflynn89@pm.me> | 2022-04-06 14:32:26 -0400 |
---|---|---|
committer | Brian Gianforcaro <b.gianfo@gmail.com> | 2022-04-06 17:12:08 -0700 |
commit | 8a46794ff8b7696c524d7162a6b7581cf05ab1e1 (patch) | |
tree | 71f275ece6ba39283455218d604f03d708aece86 /Meta/CMake/unicode_data.cmake | |
parent | 8545e2dec0b7760ee0b49023d5e94881a6364126 (diff) | |
download | serenity-8a46794ff8b7696c524d7162a6b7581cf05ab1e1.zip |
LibUnicode: Replace individual UCD file downloads with single UCD.zip
Instead of downloading nearly 20 files individually, we can download a
single .zip file similar to how we download a single CLDR .zip. This is
to reduce the number of connections/downloads to/from unicode.org.
Diffstat (limited to 'Meta/CMake/unicode_data.cmake')
-rw-r--r-- | Meta/CMake/unicode_data.cmake | 130 |
1 files changed, 67 insertions, 63 deletions
diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index c4965a473a..dfce43fe92 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -13,56 +13,59 @@ set(CLDR_PATH "${CMAKE_BINARY_DIR}/CLDR" CACHE PATH "Download location for CLDR set(UCD_VERSION_FILE "${UCD_PATH}/version.txt") set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt") -set(UNICODE_DATA_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt") -set(UNICODE_DATA_PATH "${UCD_PATH}/UnicodeData.txt") +set(UCD_ZIP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/UCD.zip") +set(UCD_ZIP_PATH "${UCD_PATH}/UCD.zip") -set(SPECIAL_CASING_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/SpecialCasing.txt") -set(SPECIAL_CASING_PATH "${UCD_PATH}/SpecialCasing.txt") +set(UNICODE_DATA_SOURCE "UnicodeData.txt") +set(UNICODE_DATA_PATH "${UCD_PATH}/${UNICODE_DATA_SOURCE}") -set(DERIVED_GENERAL_CATEGORY_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/extracted/DerivedGeneralCategory.txt") -set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/DerivedGeneralCategory.txt") +set(SPECIAL_CASING_SOURCE "SpecialCasing.txt") +set(SPECIAL_CASING_PATH "${UCD_PATH}/${SPECIAL_CASING_SOURCE}") -set(PROP_LIST_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt") -set(PROP_LIST_PATH "${UCD_PATH}/PropList.txt") +set(DERIVED_GENERAL_CATEGORY_SOURCE "extracted/DerivedGeneralCategory.txt") +set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/${DERIVED_GENERAL_CATEGORY_SOURCE}") -set(DERIVED_CORE_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/DerivedCoreProperties.txt") -set(DERIVED_CORE_PROP_PATH "${UCD_PATH}/DerivedCoreProperties.txt") +set(PROP_LIST_SOURCE "PropList.txt") +set(PROP_LIST_PATH "${UCD_PATH}/${PROP_LIST_SOURCE}") -set(DERIVED_BINARY_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/extracted/DerivedBinaryProperties.txt") -set(DERIVED_BINARY_PROP_PATH "${UCD_PATH}/DerivedBinaryProperties.txt") +set(DERIVED_CORE_PROP_SOURCE "DerivedCoreProperties.txt") +set(DERIVED_CORE_PROP_PATH "${UCD_PATH}/${DERIVED_CORE_PROP_SOURCE}") -set(PROP_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropertyAliases.txt") -set(PROP_ALIAS_PATH "${UCD_PATH}/PropertyAliases.txt") +set(DERIVED_BINARY_PROP_SOURCE "extracted/DerivedBinaryProperties.txt") +set(DERIVED_BINARY_PROP_PATH "${UCD_PATH}/${DERIVED_BINARY_PROP_SOURCE}") -set(PROP_VALUE_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropertyValueAliases.txt") -set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/PropertyValueAliases.txt") +set(PROP_ALIAS_SOURCE "PropertyAliases.txt") +set(PROP_ALIAS_PATH "${UCD_PATH}/${PROP_ALIAS_SOURCE}") -set(NAME_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/NameAliases.txt") -set(NAME_ALIAS_PATH "${UCD_PATH}/NameAliases.txt") +set(PROP_VALUE_ALIAS_SOURCE "PropertyValueAliases.txt") +set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/${PROP_VALUE_ALIAS_SOURCE}") -set(SCRIPTS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/Scripts.txt") -set(SCRIPTS_PATH "${UCD_PATH}/Scripts.txt") +set(NAME_ALIAS_SOURCE "NameAliases.txt") +set(NAME_ALIAS_PATH "${UCD_PATH}/${NAME_ALIAS_SOURCE}") -set(SCRIPT_EXTENSIONS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/ScriptExtensions.txt") -set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/ScriptExtensions.txt") +set(SCRIPTS_SOURCE "Scripts.txt") +set(SCRIPTS_PATH "${UCD_PATH}/${SCRIPTS_SOURCE}") -set(BLOCKS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/Blocks.txt") -set(BLOCKS_PATH "${UCD_PATH}/Blocks.txt") +set(SCRIPT_EXTENSIONS_SOURCE "ScriptExtensions.txt") +set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/${SCRIPT_EXTENSIONS_SOURCE}") -set(EMOJI_DATA_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/emoji/emoji-data.txt") -set(EMOJI_DATA_PATH "${UCD_PATH}/emoji-data.txt") +set(BLOCKS_SOURCE "Blocks.txt") +set(BLOCKS_PATH "${UCD_PATH}/${BLOCKS_SOURCE}") -set(NORM_PROPS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/DerivedNormalizationProps.txt") -set(NORM_PROPS_PATH "${UCD_PATH}/DerivedNormalizationProps.txt") +set(EMOJI_DATA_SOURCE "emoji/emoji-data.txt") +set(EMOJI_DATA_PATH "${UCD_PATH}/${EMOJI_DATA_SOURCE}") -set(GRAPHEME_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/GraphemeBreakProperty.txt") -set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/GraphemeBreakProperty.txt") +set(NORM_PROPS_SOURCE "DerivedNormalizationProps.txt") +set(NORM_PROPS_PATH "${UCD_PATH}/${NORM_PROPS_SOURCE}") -set(WORD_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/WordBreakProperty.txt") -set(WORD_BREAK_PROP_PATH "${UCD_PATH}/WordBreakProperty.txt") +set(GRAPHEME_BREAK_PROP_SOURCE "auxiliary/GraphemeBreakProperty.txt") +set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/${GRAPHEME_BREAK_PROP_SOURCE}") -set(SENTENCE_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/SentenceBreakProperty.txt") -set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/SentenceBreakProperty.txt") +set(WORD_BREAK_PROP_SOURCE "auxiliary/WordBreakProperty.txt") +set(WORD_BREAK_PROP_PATH "${UCD_PATH}/${WORD_BREAK_PROP_SOURCE}") + +set(SENTENCE_BREAK_PROP_SOURCE "auxiliary/SentenceBreakProperty.txt") +set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}") set(CLDR_ZIP_URL "https://github.com/unicode-org/cldr-json/releases/download/${CLDR_REAL_VERSION}/cldr-${CLDR_REAL_VERSION}-json-modern.zip") set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip") @@ -88,12 +91,12 @@ set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}") set(CLDR_UNITS_SOURCE cldr-units-modern) set(CLDR_UNITS_PATH "${CLDR_PATH}/${CLDR_UNITS_SOURCE}") -function(extract_cldr_file source path) - if(EXISTS "${CLDR_ZIP_PATH}" AND NOT EXISTS "${path}") - message(STATUS "Extracting CLDR ${source} from ${CLDR_ZIP_PATH}...") - execute_process(COMMAND "${UNZIP_TOOL}" -q "${CLDR_ZIP_PATH}" "${source}/**" -d "${CLDR_PATH}" RESULT_VARIABLE unzip_result) +function(extract_path dest_dir zip_path source_path dest_path) + if (EXISTS "${zip_path}" AND NOT EXISTS "${dest_path}") + message(STATUS "Extracting ${source_path} from ${zip_path}") + execute_process(COMMAND "${UNZIP_TOOL}" -q "${zip_path}" "${source_path}" -d "${dest_dir}" RESULT_VARIABLE unzip_result) if (NOT unzip_result EQUAL 0) - message(FATAL_ERROR "Failed to unzip ${source} from ${CLDR_ZIP_PATH} with status ${unzip_result}") + message(FATAL_ERROR "Failed to unzip ${source_path} from ${zip_path} with status ${unzip_result}") endif() endif() endfunction() @@ -102,32 +105,33 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) remove_path_if_version_changed("${UCD_VERSION}" "${UCD_VERSION_FILE}" "${UCD_PATH}") remove_path_if_version_changed("${CLDR_VERSION}" "${CLDR_VERSION_FILE}" "${CLDR_PATH}") - download_file("${UNICODE_DATA_URL}" "${UNICODE_DATA_PATH}") - download_file("${SPECIAL_CASING_URL}" "${SPECIAL_CASING_PATH}") - download_file("${DERIVED_GENERAL_CATEGORY_URL}" "${DERIVED_GENERAL_CATEGORY_PATH}") - download_file("${PROP_LIST_URL}" "${PROP_LIST_PATH}") - download_file("${DERIVED_CORE_PROP_URL}" "${DERIVED_CORE_PROP_PATH}") - download_file("${DERIVED_BINARY_PROP_URL}" "${DERIVED_BINARY_PROP_PATH}") - download_file("${PROP_ALIAS_URL}" "${PROP_ALIAS_PATH}") - download_file("${PROP_VALUE_ALIAS_URL}" "${PROP_VALUE_ALIAS_PATH}") - download_file("${NAME_ALIAS_URL}" "${NAME_ALIAS_PATH}") - download_file("${SCRIPTS_URL}" "${SCRIPTS_PATH}") - download_file("${SCRIPT_EXTENSIONS_URL}" "${SCRIPT_EXTENSIONS_PATH}") - download_file("${BLOCKS_URL}" "${BLOCKS_PATH}") - download_file("${EMOJI_DATA_URL}" "${EMOJI_DATA_PATH}") - download_file("${NORM_PROPS_URL}" "${NORM_PROPS_PATH}") - download_file("${GRAPHEME_BREAK_PROP_URL}" "${GRAPHEME_BREAK_PROP_PATH}") - download_file("${WORD_BREAK_PROP_URL}" "${WORD_BREAK_PROP_PATH}") - download_file("${SENTENCE_BREAK_PROP_URL}" "${SENTENCE_BREAK_PROP_PATH}") + download_file("${UCD_ZIP_URL}" "${UCD_ZIP_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${UNICODE_DATA_SOURCE}" "${UNICODE_DATA_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SPECIAL_CASING_SOURCE}" "${SPECIAL_CASING_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_GENERAL_CATEGORY_SOURCE}" "${DERIVED_GENERAL_CATEGORY_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_LIST_SOURCE}" "${PROP_LIST_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_CORE_PROP_SOURCE}" "${DERIVED_CORE_PROP_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_BINARY_PROP_SOURCE}" "${DERIVED_BINARY_PROP_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_ALIAS_SOURCE}" "${PROP_ALIAS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_VALUE_ALIAS_SOURCE}" "${PROP_VALUE_ALIAS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NAME_ALIAS_SOURCE}" "${NAME_ALIAS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPTS_SOURCE}" "${SCRIPTS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${BLOCKS_SOURCE}" "${BLOCKS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${EMOJI_DATA_SOURCE}" "${EMOJI_DATA_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NORM_PROPS_SOURCE}" "${NORM_PROPS_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${GRAPHEME_BREAK_PROP_SOURCE}" "${GRAPHEME_BREAK_PROP_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${WORD_BREAK_PROP_SOURCE}" "${WORD_BREAK_PROP_PATH}") + extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SENTENCE_BREAK_PROP_SOURCE}" "${SENTENCE_BREAK_PROP_PATH}") download_file("${CLDR_ZIP_URL}" "${CLDR_ZIP_PATH}") - extract_cldr_file("${CLDR_BCP47_SOURCE}" "${CLDR_BCP47_PATH}") - extract_cldr_file("${CLDR_CORE_SOURCE}" "${CLDR_CORE_PATH}") - extract_cldr_file("${CLDR_DATES_SOURCE}" "${CLDR_DATES_PATH}") - extract_cldr_file("${CLDR_LOCALES_SOURCE}" "${CLDR_LOCALES_PATH}") - extract_cldr_file("${CLDR_MISC_SOURCE}" "${CLDR_MISC_PATH}") - extract_cldr_file("${CLDR_NUMBERS_SOURCE}" "${CLDR_NUMBERS_PATH}") - extract_cldr_file("${CLDR_UNITS_SOURCE}" "${CLDR_UNITS_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_LOCALES_SOURCE}/**" "${CLDR_LOCALES_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_MISC_SOURCE}/**" "${CLDR_MISC_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}") + extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_UNITS_SOURCE}/**" "${CLDR_UNITS_PATH}") set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h) set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp) |