diff options
author | Timothy Flynn <trflynn89@pm.me> | 2023-01-07 13:59:10 -0500 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2023-01-08 12:13:15 +0100 |
commit | d793262beba3a113bed4c728b572a78583b43277 (patch) | |
tree | e30bd93b43361cc563406dcd934e95f264d24bc9 | |
parent | 1edb96376b51519fe9a7aff2d281f243ca19fd45 (diff) | |
download | serenity-d793262beba3a113bed4c728b572a78583b43277.zip |
AK+Everywhere: Make UTF-16 to UTF-8 converter fallible
This could fail to allocate the underlying storage needed to store the
UTF-8 data. Propagate this error.
-rw-r--r-- | AK/Utf16View.cpp | 8 | ||||
-rw-r--r-- | AK/Utf16View.h | 2 | ||||
-rw-r--r-- | Tests/AK/TestUtf16.cpp | 14 | ||||
-rw-r--r-- | Userland/Applications/HexEditor/HexEditorWidget.cpp | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp | 6 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp | 3 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Utf16String.cpp | 5 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Utf16String.h | 3 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatch.h | 2 |
10 files changed, 25 insertions, 22 deletions
diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index 4c23484bee..2c049f5e39 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -81,7 +81,7 @@ u32 Utf16View::decode_surrogate_pair(u16 high_surrogate, u16 low_surrogate) return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point; } -DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const +ErrorOr<DeprecatedString> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const { StringBuilder builder; @@ -92,17 +92,17 @@ DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_uni if ((next < end_ptr()) && is_low_surrogate(*next)) { auto code_point = decode_surrogate_pair(*ptr, *next); - builder.append_code_point(code_point); + TRY(builder.try_append_code_point(code_point)); ++ptr; continue; } } - builder.append_code_point(static_cast<u32>(*ptr)); + TRY(builder.try_append_code_point(static_cast<u32>(*ptr))); } } else { for (auto code_point : *this) - builder.append_code_point(code_point); + TRY(builder.try_append_code_point(code_point)); } return builder.build(); diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 101f4e4892..1a46546da1 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -75,7 +75,7 @@ public: No, }; - DeprecatedString to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const; + ErrorOr<DeprecatedString> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const; bool is_null() const { return m_code_units.is_null(); } bool is_empty() const { return m_code_units.is_empty(); } diff --git a/Tests/AK/TestUtf16.cpp b/Tests/AK/TestUtf16.cpp index 56b2057e17..174c959e7a 100644 --- a/Tests/AK/TestUtf16.cpp +++ b/Tests/AK/TestUtf16.cpp @@ -56,14 +56,14 @@ TEST_CASE(encode_utf8) DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"); auto string = MUST(AK::utf8_to_utf16(utf8_string)); Utf16View view { string }; - EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string); - EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string); + EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string); + EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string); } { auto encoded = Array { (u16)0xd83d }; Utf16View view { encoded }; - EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv); - EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv); + EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv); + EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv); } } @@ -269,14 +269,14 @@ TEST_CASE(substring_view) view = view.substring_view(7, 2); EXPECT(view.length_in_code_units() == 2); - EXPECT_EQ(view.to_utf8(), "😀"sv); + EXPECT_EQ(MUST(view.to_utf8()), "😀"sv); } { Utf16View view { string }; view = view.substring_view(7, 1); EXPECT(view.length_in_code_units() == 1); - EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv); - EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv); + EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv); + EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv); } } diff --git a/Userland/Applications/HexEditor/HexEditorWidget.cpp b/Userland/Applications/HexEditor/HexEditorWidget.cpp index 0e363be188..122d377020 100644 --- a/Userland/Applications/HexEditor/HexEditorWidget.cpp +++ b/Userland/Applications/HexEditor/HexEditorWidget.cpp @@ -372,7 +372,7 @@ void HexEditorWidget::update_inspector_values(size_t position) if (valid_code_units == 0) value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, ""); else - value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8()); + value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8().release_value_but_fixme_should_propagate_errors()); } else { value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, ""); } diff --git a/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp index ddf89aee18..cef3472432 100644 --- a/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/AbstractOperations.cpp @@ -1265,7 +1265,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma } else if (is_ascii_digit(next)) { bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2)); - auto capture_position_string = replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8(); + auto capture_position_string = TRY_OR_THROW_OOM(vm, replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8()); auto capture_position = capture_position_string.to_uint(); if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) { @@ -1295,7 +1295,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma result.append(curr); } else { auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position); - auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); + auto group_name = TRY_OR_THROW_OOM(vm, group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); auto capture = TRY(named_captures.as_object().get(group_name)); @@ -1311,7 +1311,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma } } - return Utf16String(move(result)).to_utf8(); + return TRY_OR_THROW_OOM(vm, Utf16View { result }.to_utf8()); } } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp index 57973e4711..8f1f527b77 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/DateTimeFormat.cpp @@ -718,7 +718,7 @@ ThrowCompletionOr<Vector<PatternPartition>> format_date_time_pattern(VM& vm, Dat if (formatted_value.length() > 2) { Utf16String utf16_formatted_value { formatted_value }; if (utf16_formatted_value.length_in_code_units() > 2) - formatted_value = utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8(); + formatted_value = TRY_OR_THROW_OOM(vm, utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8()); } break; diff --git a/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp b/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp index c29c4b0cc8..3d4f87de34 100644 --- a/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp +++ b/Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp @@ -68,7 +68,8 @@ DeprecatedString const& PrimitiveString::deprecated_string() const { resolve_rope_if_needed(); if (!m_has_utf8_string) { - m_utf8_string = m_utf16_string.to_utf8(); + // FIXME: Propagate this error. + m_utf8_string = MUST(m_utf16_string.to_utf8(vm())); m_has_utf8_string = true; } return m_utf8_string; diff --git a/Userland/Libraries/LibJS/Runtime/Utf16String.cpp b/Userland/Libraries/LibJS/Runtime/Utf16String.cpp index e6a4b4caf2..098d761e67 100644 --- a/Userland/Libraries/LibJS/Runtime/Utf16String.cpp +++ b/Userland/Libraries/LibJS/Runtime/Utf16String.cpp @@ -6,6 +6,7 @@ #include <AK/StringView.h> #include <LibJS/Runtime/Utf16String.h> +#include <LibJS/Runtime/VM.h> namespace JS { namespace Detail { @@ -96,9 +97,9 @@ Utf16View Utf16String::substring_view(size_t code_unit_offset) const return view().substring_view(code_unit_offset); } -DeprecatedString Utf16String::to_utf8() const +ThrowCompletionOr<DeprecatedString> Utf16String::to_utf8(VM& vm) const { - return view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); + return TRY_OR_THROW_OOM(vm, view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); } u16 Utf16String::code_unit_at(size_t index) const diff --git a/Userland/Libraries/LibJS/Runtime/Utf16String.h b/Userland/Libraries/LibJS/Runtime/Utf16String.h index fdcc014252..4b101a8261 100644 --- a/Userland/Libraries/LibJS/Runtime/Utf16String.h +++ b/Userland/Libraries/LibJS/Runtime/Utf16String.h @@ -12,6 +12,7 @@ #include <AK/Types.h> #include <AK/Utf16View.h> #include <AK/Vector.h> +#include <LibJS/Runtime/Completion.h> namespace JS { namespace Detail { @@ -49,7 +50,7 @@ public: Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const; Utf16View substring_view(size_t code_unit_offset) const; - DeprecatedString to_utf8() const; + ThrowCompletionOr<DeprecatedString> to_utf8(VM&) const; u16 code_unit_at(size_t index) const; size_t length_in_code_units() const; diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 9943d55909..47bcf7f39d 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -385,7 +385,7 @@ public: { return m_view.visit( [](StringView view) { return view.to_deprecated_string(); }, - [](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); }, + [](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes).release_value_but_fixme_should_propagate_errors(); }, [](auto& view) { StringBuilder builder; for (auto it = view.begin(); it != view.end(); ++it) |