From c020ee8bfa33e27ef438a65d7dc933660eb9be87 Mon Sep 17 00:00:00 2001
From: Tim Schumacher <timschumi@gmx.de>
Date: Wed, 29 Mar 2023 15:21:03 +0200
Subject: LibCompress: Avoid overflowing the size of uncompressed LZMA2 chunks

---
 Tests/LibCompress/TestXz.cpp             | 52 ++++++++++++++++++++++++++++++++
 Userland/Libraries/LibCompress/Lzma2.cpp |  2 +-
 2 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/Tests/LibCompress/TestXz.cpp b/Tests/LibCompress/TestXz.cpp
index c66d31330b..73db7532cb 100644
--- a/Tests/LibCompress/TestXz.cpp
+++ b/Tests/LibCompress/TestXz.cpp
@@ -68,6 +68,58 @@ TEST_CASE(lzma2_compressed_without_settings_after_uncompressed)
     EXPECT_EQ("\x00\x00\x00"sv.bytes(), buffer.span());
 }
 
+TEST_CASE(lzma2_uncompressed_size_overflow)
+{
+    Array<u8, 48> const compressed {
+        // Stream Header
+        0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00, // Magic
+        0x00, 0x00,                         // Stream Flags (Check: None)
+        0xFF, 0x12, 0xD9, 0x41,             // CRC32
+
+        // Block Header
+        0x02, // Block Header Size [(0x02 + 1) * 4, i.e. 12 bytes]
+        0x00, // Block Flags (one filter, no compressed or uncompressed size present)
+        //   Filter 0 Flags
+        0x21,                   // Filter ID (0x21 for LZMA2, encoded as a multibyte integer)
+        0x01,                   // Size of Properties (0x01, encoded as a multibyte integer)
+        0x00,                   // Filter Properties (LZMA2 encoded dictionary size byte; 0x00 = 4 KiB)
+        0x00, 0x00, 0x00,       // Header Padding
+        0x37, 0x27, 0x97, 0xD6, // CRC32
+
+        // Compressed Data (LZMA2)
+        //  Uncompressed chunk with dictionary reset
+        0x01,       // Control Byte
+        0xFF, 0xFF, // 16-bit data size minus one (big-endian)
+                    // Note: The following data should be handled as verbatim data if handled correctly.
+                    //       If the size overflows, no bytes will be copied and it will be interpreted as the remainder of the XZ stream.
+        //  End of LZMA2 stream
+        0x00,
+
+        // Index
+        0x00, // Index Indicator
+        0x01, // Number of Records (multibyte integer)
+        //   Record 0
+        0x10, // Unpadded Size (multibyte integer)
+        0x00, // Uncompressed Size (multibyte integer)
+        //   CRC32
+        0x7A, 0xA7, 0x44, 0x6A,
+
+        // Stream Footer
+        0x06, 0x72, 0x9E, 0x7A, // CRC32
+        0x01, 0x00, 0x00, 0x00, // Backward Size
+        0x00, 0x00,             // Stream Flags
+        0x59, 0x5A,             // Footer Magic Bytes
+    };
+
+    auto stream = MUST(try_make<FixedMemoryStream>(compressed));
+    auto decompressor = MUST(Compress::XzDecompressor::create(move(stream)));
+    auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE);
+
+    // If the size overflows (and the uncompressed size being 0 doesn't get caught otherwise), the data payload will be interpreted as stream metadata, resulting in a valid stream.
+    // If the size doesn't overflow, the stream ends in the middle unexpectedly, which should result in an error.
+    EXPECT(buffer_or_error.is_error());
+}
+
 // The following tests are based on test files from the XZ utils package, which have been placed in the public domain:
 // https://tukaani.org/xz/xz-5.4.1.tar.xz (subdirectory /xz-5.4.1/tests/files)
 // Test descriptions have been taken from the README file in the test files directory.
diff --git a/Userland/Libraries/LibCompress/Lzma2.cpp b/Userland/Libraries/LibCompress/Lzma2.cpp
index 912a0c8968..244c7c5534 100644
--- a/Userland/Libraries/LibCompress/Lzma2.cpp
+++ b/Userland/Libraries/LibCompress/Lzma2.cpp
@@ -54,7 +54,7 @@ ErrorOr<Bytes> Lzma2Decompressor::read_some(Bytes bytes)
             // "Uncompressed chunks consist of:
             //   - A 16-bit big-endian value encoding the data size minus one
             //   - The data to be copied verbatim into the dictionary and the output"
-            u16 data_size = TRY(m_stream->read_value<BigEndian<u16>>()) + 1;
+            u32 data_size = TRY(m_stream->read_value<BigEndian<u16>>()) + 1;
 
             m_in_uncompressed_chunk = true;
             m_current_chunk_stream = TRY(try_make<ConstrainedStream>(MaybeOwned { *m_stream }, data_size));
-- 
cgit v1.2.3