summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AK/String.cpp30
-rw-r--r--AK/String.h2
-rw-r--r--Tests/AK/TestString.cpp20
3 files changed, 37 insertions, 15 deletions
diff --git a/AK/String.cpp b/AK/String.cpp
index f5b3f69eaf..a03fcdefc4 100644
--- a/AK/String.cpp
+++ b/AK/String.cpp
@@ -11,7 +11,6 @@
#include <AK/MemMem.h>
#include <AK/Stream.h>
#include <AK/String.h>
-#include <AK/Utf8View.h>
#include <AK/Vector.h>
#include <stdlib.h>
@@ -132,10 +131,6 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT);
- Utf8View view(StringView(utf8_data, byte_count));
- if (!view.validate())
- return Error::from_string_literal("StringData::from_utf8: Input was not valid UTF-8");
-
VERIFY(utf8_data);
u8* buffer = nullptr;
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
@@ -143,6 +138,16 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
return new_string_data;
}
+static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer)
+{
+ TRY(stream.read_entire_buffer(buffer));
+
+ if (!Utf8View { StringView { buffer } }.validate())
+ return Error::from_string_literal("String::from_stream: Input was not valid UTF-8");
+
+ return {};
+}
+
ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count)
{
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
@@ -150,12 +155,7 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_
u8* buffer = nullptr;
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
- Bytes new_string_bytes = { buffer, byte_count };
- TRY(stream.read_entire_buffer(new_string_bytes));
-
- Utf8View view(StringView { new_string_bytes });
- if (!view.validate())
- return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8");
+ TRY(read_stream_into_buffer(stream, { buffer, byte_count }));
return new_string_data;
}
@@ -230,6 +230,9 @@ void String::destroy_string()
ErrorOr<String> String::from_utf8(StringView view)
{
+ if (!Utf8View { view }.validate())
+ return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8");
+
if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) {
ShortString short_string;
if (!view.is_empty())
@@ -246,7 +249,7 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) {
ShortString short_string;
if (byte_count > 0)
- TRY(stream.read_entire_buffer({ short_string.storage, byte_count }));
+ TRY(Detail::read_stream_into_buffer(stream, { short_string.storage, byte_count }));
short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG;
return String { short_string };
}
@@ -587,9 +590,6 @@ DeprecatedString String::to_deprecated_string() const
ErrorOr<String> String::from_deprecated_string(DeprecatedString const& deprecated_string)
{
- Utf8View view(deprecated_string);
- if (!view.validate())
- return Error::from_string_literal("String::from_deprecated_string: Input was not valid UTF-8");
return String::from_utf8(deprecated_string.view());
}
diff --git a/AK/String.h b/AK/String.h
index 45ac7a99fc..32959e26ab 100644
--- a/AK/String.h
+++ b/AK/String.h
@@ -20,6 +20,7 @@
#include <AK/Traits.h>
#include <AK/Types.h>
#include <AK/UnicodeUtils.h>
+#include <AK/Utf8View.h>
#include <AK/Vector.h>
namespace AK {
@@ -72,6 +73,7 @@ public:
static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string)
{
VERIFY(string.length() <= MAX_SHORT_STRING_BYTE_COUNT);
+ VERIFY(Utf8View { string }.validate());
ShortString short_string;
for (size_t i = 0; i < string.length(); ++i)
diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp
index f992fa1bc0..d841bd56dd 100644
--- a/Tests/AK/TestString.cpp
+++ b/Tests/AK/TestString.cpp
@@ -140,6 +140,26 @@ TEST_CASE(long_streams)
}
}
+TEST_CASE(invalid_utf8)
+{
+ auto string1 = String::from_utf8("long string \xf4\x8f\xbf\xc0"sv); // U+110000
+ EXPECT(string1.is_error());
+ EXPECT(string1.error().string_literal().contains("Input was not valid UTF-8"sv));
+
+ auto string2 = String::from_utf8("\xf4\xa1\xb0\xbd"sv); // U+121C3D
+ EXPECT(string2.is_error());
+ EXPECT(string2.error().string_literal().contains("Input was not valid UTF-8"sv));
+
+ AllocatingMemoryStream stream;
+ MUST(stream.write_value<u8>(0xf4));
+ MUST(stream.write_value<u8>(0xa1));
+ MUST(stream.write_value<u8>(0xb0));
+ MUST(stream.write_value<u8>(0xbd));
+ auto string3 = String::from_stream(stream, stream.used_buffer_size());
+ EXPECT_EQ(string3.is_error(), true);
+ EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv));
+}
+
TEST_CASE(from_code_points)
{
for (u32 code_point = 0; code_point < 0x80; ++code_point) {