diff options
-rw-r--r-- | AK/String.cpp | 30 | ||||
-rw-r--r-- | AK/String.h | 2 | ||||
-rw-r--r-- | Tests/AK/TestString.cpp | 20 |
3 files changed, 37 insertions, 15 deletions
diff --git a/AK/String.cpp b/AK/String.cpp index f5b3f69eaf..a03fcdefc4 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -11,7 +11,6 @@ #include <AK/MemMem.h> #include <AK/Stream.h> #include <AK/String.h> -#include <AK/Utf8View.h> #include <AK/Vector.h> #include <stdlib.h> @@ -132,10 +131,6 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data, // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT); - Utf8View view(StringView(utf8_data, byte_count)); - if (!view.validate()) - return Error::from_string_literal("StringData::from_utf8: Input was not valid UTF-8"); - VERIFY(utf8_data); u8* buffer = nullptr; auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); @@ -143,6 +138,16 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data, return new_string_data; } +static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer) +{ + TRY(stream.read_entire_buffer(buffer)); + + if (!Utf8View { StringView { buffer } }.validate()) + return Error::from_string_literal("String::from_stream: Input was not valid UTF-8"); + + return {}; +} + ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count) { // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. @@ -150,12 +155,7 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_ u8* buffer = nullptr; auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); - Bytes new_string_bytes = { buffer, byte_count }; - TRY(stream.read_entire_buffer(new_string_bytes)); - - Utf8View view(StringView { new_string_bytes }); - if (!view.validate()) - return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8"); + TRY(read_stream_into_buffer(stream, { buffer, byte_count })); return new_string_data; } @@ -230,6 +230,9 @@ void String::destroy_string() ErrorOr<String> String::from_utf8(StringView view) { + if (!Utf8View { view }.validate()) + return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8"); + if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) { ShortString short_string; if (!view.is_empty()) @@ -246,7 +249,7 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count) if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) { ShortString short_string; if (byte_count > 0) - TRY(stream.read_entire_buffer({ short_string.storage, byte_count })); + TRY(Detail::read_stream_into_buffer(stream, { short_string.storage, byte_count })); short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG; return String { short_string }; } @@ -587,9 +590,6 @@ DeprecatedString String::to_deprecated_string() const ErrorOr<String> String::from_deprecated_string(DeprecatedString const& deprecated_string) { - Utf8View view(deprecated_string); - if (!view.validate()) - return Error::from_string_literal("String::from_deprecated_string: Input was not valid UTF-8"); return String::from_utf8(deprecated_string.view()); } diff --git a/AK/String.h b/AK/String.h index 45ac7a99fc..32959e26ab 100644 --- a/AK/String.h +++ b/AK/String.h @@ -20,6 +20,7 @@ #include <AK/Traits.h> #include <AK/Types.h> #include <AK/UnicodeUtils.h> +#include <AK/Utf8View.h> #include <AK/Vector.h> namespace AK { @@ -72,6 +73,7 @@ public: static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string) { VERIFY(string.length() <= MAX_SHORT_STRING_BYTE_COUNT); + VERIFY(Utf8View { string }.validate()); ShortString short_string; for (size_t i = 0; i < string.length(); ++i) diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index f992fa1bc0..d841bd56dd 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -140,6 +140,26 @@ TEST_CASE(long_streams) } } +TEST_CASE(invalid_utf8) +{ + auto string1 = String::from_utf8("long string \xf4\x8f\xbf\xc0"sv); // U+110000 + EXPECT(string1.is_error()); + EXPECT(string1.error().string_literal().contains("Input was not valid UTF-8"sv)); + + auto string2 = String::from_utf8("\xf4\xa1\xb0\xbd"sv); // U+121C3D + EXPECT(string2.is_error()); + EXPECT(string2.error().string_literal().contains("Input was not valid UTF-8"sv)); + + AllocatingMemoryStream stream; + MUST(stream.write_value<u8>(0xf4)); + MUST(stream.write_value<u8>(0xa1)); + MUST(stream.write_value<u8>(0xb0)); + MUST(stream.write_value<u8>(0xbd)); + auto string3 = String::from_stream(stream, stream.used_buffer_size()); + EXPECT_EQ(string3.is_error(), true); + EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv)); +} + TEST_CASE(from_code_points) { for (u32 code_point = 0; code_point < 0x80; ++code_point) { |