summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Meta/Lagom/CMakeLists.txt1
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp38
-rw-r--r--Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h54
-rw-r--r--Userland/Libraries/LibUnicode/CMakeLists.txt1
-rw-r--r--Userland/Libraries/LibUnicode/CharacterTypes.cpp125
-rw-r--r--Userland/Libraries/LibUnicode/UnicodeSymbols.cpp94
-rw-r--r--Userland/Libraries/LibUnicode/UnicodeSymbols.h44
7 files changed, 256 insertions, 101 deletions
diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt
index 174110e07d..6fe3516a01 100644
--- a/Meta/Lagom/CMakeLists.txt
+++ b/Meta/Lagom/CMakeLists.txt
@@ -427,6 +427,7 @@ if (BUILD_LAGOM)
SOURCES ${LIBUNICODE_SOURCES} ${UNICODE_DATA_SOURCES}
)
target_compile_definitions(LagomUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)
+ target_link_libraries(LagomUnicode -ldl)
# WASM
file(GLOB LIBWASM_SOURCES CONFIGURE_DEPENDS "../../Userland/Libraries/LibWasm/*/*.cpp")
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
index c34ecf4b10..53cc9fb2e1 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
@@ -574,8 +574,6 @@ enum class @name@ : @underlying@ {)~~~");
generator.append(R"~~~(
#pragma once
-#include <AK/Optional.h>
-#include <AK/Span.h>
#include <AK/Types.h>
#include <LibUnicode/Forward.h>
#include <LibUnicode/UnicodeLocale.h>
@@ -605,28 +603,6 @@ struct SpecialCasing {
Condition condition { Condition::None };
};
-namespace Detail {
-
-Optional<String> code_point_display_name(u32 code_point);
-
-u32 canonical_combining_class(u32 code_point);
-
-u32 simple_uppercase_mapping(u32 code_point);
-u32 simple_lowercase_mapping(u32 code_point);
-Span<SpecialCasing const* const> special_case_mapping(u32 code_point);
-
-bool code_point_has_general_category(u32 code_point, GeneralCategory general_category);
-Optional<GeneralCategory> general_category_from_string(StringView general_category);
-
-bool code_point_has_property(u32 code_point, Property property);
-Optional<Property> property_from_string(StringView property);
-
-bool code_point_has_script(u32 code_point, Script script);
-bool code_point_has_script_extension(u32 code_point, Script script);
-Optional<Script> script_from_string(StringView script);
-
-}
-
}
)~~~");
@@ -645,11 +621,13 @@ static void generate_unicode_data_implementation(Core::File& file, UnicodeData c
#include <AK/Array.h>
#include <AK/BinarySearch.h>
#include <AK/CharacterTypes.h>
+#include <AK/Optional.h>
+#include <AK/Span.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <LibUnicode/UnicodeData.h>
-namespace Unicode {
+namespace Unicode::Detail {
)~~~");
auto append_list_and_size = [&](auto const& list, StringView format) {
@@ -873,8 +851,7 @@ static constexpr Array<CodePointName, @code_point_display_names_size@> s_code_po
)~~~");
generator.append(R"~~~(
-namespace Detail {
-
+Optional<String> code_point_display_name(u32 code_point) asm("unicode_code_point_display_name");
Optional<String> code_point_display_name(u32 code_point)
{
if (auto const* entry = binary_search(s_code_point_display_names, code_point, nullptr, CodePointNameComparator {})) {
@@ -893,6 +870,7 @@ Optional<String> code_point_display_name(u32 code_point)
generator.set("mappings", mappings);
generator.set("fallback", fallback);
generator.append(R"~~~(
+u32 @method@(u32 code_point) asm("unicode_@method@");
u32 @method@(u32 code_point)
{
auto const* mapping = binary_search(@mappings@, code_point, nullptr, CodePointComparator<CodePointMapping> {});
@@ -906,6 +884,7 @@ u32 @method@(u32 code_point)
append_code_point_mapping_search("simple_lowercase_mapping"sv, "s_lowercase_mappings"sv, "code_point"sv);
generator.append(R"~~~(
+Span<SpecialCasing const* const> special_case_mapping(u32 code_point) asm("unicode_special_case_mapping");
Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
{
auto const* mapping = binary_search(s_special_case_mappings, code_point, nullptr, CodePointComparator<SpecialCaseMapping> {});
@@ -921,6 +900,7 @@ Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
generator.set("enum_snake", enum_snake);
generator.set("collection_name", collection_name);
generator.append(R"~~~(
+bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@) asm("unicode_code_point_has_@enum_snake@");
bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
{
auto index = static_cast<@enum_title@UnderlyingType>(@enum_snake@);
@@ -941,7 +921,7 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
for (auto const& alias : aliases)
hashes.set(alias.alias.hash(), alias.alias);
- generate_value_from_string(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
+ generate_value_from_string_for_dynamic_loading(generator, "{}_from_string"sv, enum_title, enum_snake, move(hashes));
};
append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv);
@@ -956,8 +936,6 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
generator.append(R"~~~(
}
-
-}
)~~~");
VERIFY(file.write(generator.as_string_view()));
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h
index 74621fbbc2..e4b21bb814 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GeneratorUtil.h
@@ -345,6 +345,60 @@ Optional<@return_type@> @method_name@(StringView key)
)~~~");
}
+// This is a temporary duplicate of generate_value_from_string() until all generators support dynamic loading.
+template<typename ValueType>
+void generate_value_from_string_for_dynamic_loading(SourceGenerator& generator, StringView method_name_format, StringView value_type, StringView value_name, HashValueMap<ValueType> hashes, Optional<StringView> return_type = {}, StringView return_format = "{}"sv)
+{
+ ensure_from_string_types_are_generated(generator);
+
+ generator.set("method_name", String::formatted(method_name_format, value_name));
+ generator.set("value_type", value_type);
+ generator.set("value_name", value_name);
+ generator.set("return_type", return_type.has_value() ? *return_type : value_type);
+ generator.set("size", String::number(hashes.size()));
+
+ generator.append(R"~~~(
+Optional<@return_type@> @method_name@(StringView key) asm("unicode_@method_name@");
+Optional<@return_type@> @method_name@(StringView key)
+{
+ constexpr Array<HashValuePair<@value_type@>, @size@> hash_pairs { {
+ )~~~");
+
+ auto hash_keys = hashes.keys();
+ quick_sort(hash_keys);
+
+ constexpr size_t max_values_per_row = 10;
+ size_t values_in_current_row = 0;
+
+ for (auto hash_key : hash_keys) {
+ if (values_in_current_row++ > 0)
+ generator.append(" ");
+
+ if constexpr (IsIntegral<ValueType>)
+ generator.set("value"sv, String::number(hashes.get(hash_key).value()));
+ else
+ generator.set("value"sv, String::formatted("{}::{}", value_type, hashes.get(hash_key).value()));
+
+ generator.set("hash"sv, String::number(hash_key));
+ generator.append("{ @hash@U, @value@ },"sv);
+
+ if (values_in_current_row == max_values_per_row) {
+ generator.append("\n ");
+ values_in_current_row = 0;
+ }
+ }
+
+ generator.set("return_statement", String::formatted(return_format, "value->value"sv));
+ generator.append(R"~~~(
+ } };
+
+ if (auto const* value = binary_search(hash_pairs, key.hash(), nullptr, HashValueComparator<@value_type@> {}))
+ return @return_statement@;
+ return {};
+}
+)~~~");
+}
+
template<typename IdentifierFormatter>
void generate_enum(SourceGenerator& generator, IdentifierFormatter&& format_identifier, StringView name, StringView default_, Vector<String>& values, Vector<Alias> aliases = {})
{
diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt
index 6634f4b504..9249cdc373 100644
--- a/Userland/Libraries/LibUnicode/CMakeLists.txt
+++ b/Userland/Libraries/LibUnicode/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SOURCES
DateTimeFormat.cpp
Locale.cpp
NumberFormat.cpp
+ UnicodeSymbols.cpp
)
serenity_lib(LibUnicode unicode)
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
index 520f94fc4b..c394650e2c 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -11,6 +11,7 @@
#include <AK/Utf8View.h>
#include <LibUnicode/CharacterTypes.h>
#include <LibUnicode/Locale.h>
+#include <LibUnicode/UnicodeSymbols.h>
#if ENABLE_UNICODE_DATA
# include <LibUnicode/UnicodeData.h>
@@ -23,6 +24,18 @@ namespace Unicode {
#if ENABLE_UNICODE_DATA
+static u32 canonical_combining_class(u32 code_point)
+{
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.canonical_combining_class(code_point);
+}
+
+static Span<Unicode::SpecialCasing const* const> special_case_mapping(u32 code_point)
+{
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.special_case_mapping(code_point);
+}
+
static bool is_after_uppercase_i(Utf8View const& string, size_t index)
{
// There is an uppercase I before C, and there is no intervening combining character class 230 (Above) or 0.
@@ -36,11 +49,11 @@ static bool is_after_uppercase_i(Utf8View const& string, size_t index)
continue;
}
- u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
+ u32 combining_class = canonical_combining_class(code_point);
- if (canonical_combining_class == 0)
+ if (combining_class == 0)
found_uppercase_i = false;
- else if (canonical_combining_class == 230)
+ else if (combining_class == 230)
found_uppercase_i = false;
}
@@ -60,11 +73,11 @@ static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index
continue;
}
- u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
+ u32 combining_class = canonical_combining_class(code_point);
- if (canonical_combining_class == 0)
+ if (combining_class == 0)
found_soft_dotted_code_point = false;
- else if (canonical_combining_class == 230)
+ else if (combining_class == 230)
found_soft_dotted_code_point = false;
}
@@ -119,11 +132,11 @@ static bool is_followed_by_combining_class_above(Utf8View const& string, size_t
: Utf8View {};
for (auto code_point : following_view) {
- u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
+ u32 combining_class = canonical_combining_class(code_point);
- if (canonical_combining_class == 0)
+ if (combining_class == 0)
return false;
- if (canonical_combining_class == 230)
+ if (combining_class == 230)
return true;
}
@@ -142,11 +155,11 @@ static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t in
if (code_point == 0x307)
return true;
- u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
+ u32 combining_class = canonical_combining_class(code_point);
- if (canonical_combining_class == 0)
+ if (combining_class == 0)
return false;
- if (canonical_combining_class == 230)
+ if (combining_class == 230)
return false;
}
@@ -162,7 +175,7 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
requested_locale = *maybe_locale;
}
- auto special_casings = Detail::special_case_mapping(code_point);
+ auto special_casings = special_case_mapping(code_point);
for (auto const* special_casing : special_casings) {
if (special_casing->locale != Locale::None && special_casing->locale != requested_locale)
@@ -206,29 +219,20 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
u32 to_unicode_lowercase(u32 code_point)
{
-#if ENABLE_UNICODE_DATA
- return Detail::simple_lowercase_mapping(code_point);
-#else
- return AK::to_ascii_lowercase(code_point);
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.simple_lowercase_mapping(code_point);
}
u32 to_unicode_uppercase(u32 code_point)
{
-#if ENABLE_UNICODE_DATA
- return Detail::simple_uppercase_mapping(code_point);
-#else
- return AK::to_ascii_uppercase(code_point);
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.simple_uppercase_mapping(code_point);
}
-Optional<String> code_point_display_name([[maybe_unused]] u32 code_point)
+Optional<String> code_point_display_name(u32 code_point)
{
-#if ENABLE_UNICODE_DATA
- return Detail::code_point_display_name(code_point);
-#else
- return {};
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.code_point_display_name(code_point);
}
String to_unicode_lowercase_full(StringView string, [[maybe_unused]] Optional<StringView> locale)
@@ -289,40 +293,28 @@ String to_unicode_uppercase_full(StringView string, [[maybe_unused]] Optional<St
#endif
}
-Optional<GeneralCategory> general_category_from_string([[maybe_unused]] StringView general_category)
+Optional<GeneralCategory> general_category_from_string(StringView general_category)
{
-#if ENABLE_UNICODE_DATA
- return Detail::general_category_from_string(general_category);
-#else
- return {};
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.general_category_from_string(general_category);
}
-bool code_point_has_general_category([[maybe_unused]] u32 code_point, [[maybe_unused]] GeneralCategory general_category)
+bool code_point_has_general_category(u32 code_point, GeneralCategory general_category)
{
-#if ENABLE_UNICODE_DATA
- return Detail::code_point_has_general_category(code_point, general_category);
-#else
- return {};
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.code_point_has_general_category(code_point, general_category);
}
-Optional<Property> property_from_string([[maybe_unused]] StringView property)
+Optional<Property> property_from_string(StringView property)
{
-#if ENABLE_UNICODE_DATA
- return Detail::property_from_string(property);
-#else
- return {};
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.property_from_string(property);
}
-bool code_point_has_property([[maybe_unused]] u32 code_point, [[maybe_unused]] Property property)
+bool code_point_has_property(u32 code_point, Property property)
{
-#if ENABLE_UNICODE_DATA
- return Detail::code_point_has_property(code_point, property);
-#else
- return false;
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.code_point_has_property(code_point, property);
}
bool is_ecma262_property([[maybe_unused]] Property property)
@@ -392,31 +384,22 @@ bool is_ecma262_property([[maybe_unused]] Property property)
#endif
}
-Optional<Script> script_from_string([[maybe_unused]] StringView script)
+Optional<Script> script_from_string(StringView script)
{
-#if ENABLE_UNICODE_DATA
- return Detail::script_from_string(script);
-#else
- return {};
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.script_from_string(script);
}
-bool code_point_has_script([[maybe_unused]] u32 code_point, [[maybe_unused]] Script script)
+bool code_point_has_script(u32 code_point, Script script)
{
-#if ENABLE_UNICODE_DATA
- return Detail::code_point_has_script(code_point, script);
-#else
- return false;
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.code_point_has_script(code_point, script);
}
-bool code_point_has_script_extension([[maybe_unused]] u32 code_point, [[maybe_unused]] Script script)
+bool code_point_has_script_extension(u32 code_point, Script script)
{
-#if ENABLE_UNICODE_DATA
- return Detail::code_point_has_script_extension(code_point, script);
-#else
- return false;
-#endif
+ static auto const& symbols = Detail::Symbols::ensure_loaded();
+ return symbols.code_point_has_script_extension(code_point, script);
}
}
diff --git a/Userland/Libraries/LibUnicode/UnicodeSymbols.cpp b/Userland/Libraries/LibUnicode/UnicodeSymbols.cpp
new file mode 100644
index 0000000000..c0b594e679
--- /dev/null
+++ b/Userland/Libraries/LibUnicode/UnicodeSymbols.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibUnicode/UnicodeSymbols.h>
+
+#if ENABLE_UNICODE_DATA
+# if defined(__serenity__)
+# include <LibDl/dlfcn.h>
+# include <LibDl/dlfcn_integration.h>
+# else
+# include <dlfcn.h>
+# endif
+#else
+# include <AK/Function.h>
+#endif
+
+namespace Unicode::Detail {
+
+#if !ENABLE_UNICODE_DATA
+
+template<typename T>
+struct FunctionStub;
+
+template<typename ReturnType, typename... ParameterTypes>
+struct FunctionStub<Function<ReturnType(ParameterTypes...)>> {
+ static constexpr auto make_stub()
+ {
+ return [](ParameterTypes...) -> ReturnType { return {}; };
+ }
+};
+
+#endif
+
+// This loader supports 3 modes:
+//
+// 1. When the Unicode data generators are enabled, and the target is Serenity, the symbols are
+// dynamically loaded from the shared library containing them.
+//
+// 2. When the Unicode data generators are enabled, and the target is Lagom, the symbols are
+// dynamically loaded from the main program.
+//
+// 3. When the Unicode data generators are disabled, the symbols are stubbed out to empty lambdas.
+// This allows callers to remain agnostic as to whether the generators are enabled.
+Symbols const& Symbols::ensure_loaded()
+{
+ static Symbols symbols {};
+
+ static bool initialized = false;
+ if (initialized)
+ return symbols;
+
+#if ENABLE_UNICODE_DATA
+# if defined(__serenity__)
+ static void* libunicodedata = MUST(__dlopen("libunicodedata.so.serenity", RTLD_NOW));
+
+ auto load_symbol = [&]<typename T>(T& dest, char const* name) {
+ dest = reinterpret_cast<T>(MUST(__dlsym(libunicodedata, name)));
+ };
+# else
+ static void* libunicodedata = dlopen(nullptr, RTLD_NOW);
+ VERIFY(libunicodedata);
+
+ auto load_symbol = [&]<typename T>(T& dest, char const* name) {
+ dest = reinterpret_cast<T>(dlsym(libunicodedata, name));
+ VERIFY(dest);
+ };
+# endif
+#else
+ auto load_symbol = []<typename T>(T& dest, char const*) {
+ dest = +FunctionStub<Function<RemovePointer<T>>>::make_stub();
+ };
+#endif
+
+ load_symbol(symbols.code_point_display_name, "unicode_code_point_display_name");
+ load_symbol(symbols.canonical_combining_class, "unicode_canonical_combining_class");
+ load_symbol(symbols.simple_uppercase_mapping, "unicode_simple_uppercase_mapping");
+ load_symbol(symbols.simple_lowercase_mapping, "unicode_simple_lowercase_mapping");
+ load_symbol(symbols.special_case_mapping, "unicode_special_case_mapping");
+ load_symbol(symbols.general_category_from_string, "unicode_general_category_from_string");
+ load_symbol(symbols.code_point_has_general_category, "unicode_code_point_has_general_category");
+ load_symbol(symbols.property_from_string, "unicode_property_from_string");
+ load_symbol(symbols.code_point_has_property, "unicode_code_point_has_property");
+ load_symbol(symbols.script_from_string, "unicode_script_from_string");
+ load_symbol(symbols.code_point_has_script, "unicode_code_point_has_script");
+ load_symbol(symbols.code_point_has_script_extension, "unicode_code_point_has_script_extension");
+
+ initialized = true;
+ return symbols;
+}
+
+}
diff --git a/Userland/Libraries/LibUnicode/UnicodeSymbols.h b/Userland/Libraries/LibUnicode/UnicodeSymbols.h
new file mode 100644
index 0000000000..e4babe8218
--- /dev/null
+++ b/Userland/Libraries/LibUnicode/UnicodeSymbols.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Optional.h>
+#include <AK/String.h>
+#include <AK/StringView.h>
+#include <AK/Types.h>
+#include <LibUnicode/Forward.h>
+
+namespace Unicode::Detail {
+
+struct Symbols {
+ static Symbols const& ensure_loaded();
+
+ // Loaded from UnicodeData.cpp:
+
+ Optional<String> (*code_point_display_name)(u32) { nullptr };
+
+ u32 (*canonical_combining_class)(u32 code_point) { nullptr };
+
+ u32 (*simple_uppercase_mapping)(u32) { nullptr };
+ u32 (*simple_lowercase_mapping)(u32) { nullptr };
+ Span<SpecialCasing const* const> (*special_case_mapping)(u32 code_point) { nullptr };
+
+ Optional<GeneralCategory> (*general_category_from_string)(StringView) { nullptr };
+ bool (*code_point_has_general_category)(u32, GeneralCategory) { nullptr };
+
+ Optional<Property> (*property_from_string)(StringView) { nullptr };
+ bool (*code_point_has_property)(u32, Property) { nullptr };
+
+ Optional<Script> (*script_from_string)(StringView) { nullptr };
+ bool (*code_point_has_script)(u32, Script) { nullptr };
+ bool (*code_point_has_script_extension)(u32, Script) { nullptr };
+
+private:
+ Symbols() = default;
+};
+
+}