summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorTim Schumacher <timschumi@gmx.de>2023-03-30 00:39:36 +0200
committerAndreas Kling <kling@serenityos.org>2023-03-30 08:45:35 +0200
commit8ff36e5910445ad59ceb5d1d2acb84381c15036e (patch)
treee8935d1d64c53d767f343773207d081adf94cf4d /Userland
parentb6f3b2f1161a3c98c947511b3452e878cae449c7 (diff)
downloadserenity-8ff36e5910445ad59ceb5d1d2acb84381c15036e.zip
LibCompress: Implement proper handling of LZMA end-of-stream markers
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibCompress/Lzma.cpp37
-rw-r--r--Userland/Libraries/LibCompress/Lzma.h1
-rw-r--r--Userland/Libraries/LibCompress/Lzma2.cpp3
3 files changed, 32 insertions, 9 deletions
diff --git a/Userland/Libraries/LibCompress/Lzma.cpp b/Userland/Libraries/LibCompress/Lzma.cpp
index c858b39879..5deafa2c0a 100644
--- a/Userland/Libraries/LibCompress/Lzma.cpp
+++ b/Userland/Libraries/LibCompress/Lzma.cpp
@@ -81,6 +81,7 @@ ErrorOr<LzmaDecompressorOptions> LzmaHeader::as_decompressor_options() const
.position_bits = model_properties.position_bits,
.dictionary_size = dictionary_size(),
.uncompressed_size = uncompressed_size(),
+ .reject_end_of_stream_marker = false,
};
}
@@ -457,17 +458,15 @@ ErrorOr<u32> LzmaDecompressor::decode_normalized_match_distance(u16 normalized_m
ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
{
while (m_dictionary->used_space() < bytes.size() && m_dictionary->empty_space() != 0) {
- if (m_found_end_of_stream_marker) {
- if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
- return Error::from_string_literal("Found end-of-stream marker earlier than expected");
-
+ if (m_found_end_of_stream_marker)
break;
- }
if (has_reached_expected_data_size()) {
- // FIXME: This should validate that either EOF or the 'end of stream' marker follow immediately.
- // Both of those cases count as the 'end of stream' marker being found and should check for a clean decoder state.
- break;
+ // If the decoder is in a clean state, we assume that this is fine.
+ if (is_range_decoder_in_clean_state())
+ break;
+
+ // Otherwise, we give it one last try to find the end marker in the remaining data.
}
// "The decoder calculates "state2" variable value to select exact variable from
@@ -543,6 +542,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
// IsMatch[state2] decode
// 0 - the Literal"
if (TRY(decode_bit_with_probability(m_is_match_probabilities[state2])) == 0) {
+ // If we are already past the expected uncompressed size, we are already in "look for EOS only" mode.
+ if (has_reached_expected_data_size())
+ return Error::from_string_literal("Found literal after reaching expected uncompressed size");
+
// "At first the LZMA decoder must check that it doesn't exceed
// specified uncompressed size."
// This is already checked for at the beginning of the loop.
@@ -577,11 +580,20 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
// "End of stream" marker, so we can stop decoding and check finishing
// condition in Range Decoder"
if (m_rep0 == 0xFFFFFFFF) {
+ // If we should reject end-of-stream markers, do so now.
+ // Note that this is not part of LZMA, as LZMA allows end-of-stream markers in all contexts, so pure LZMA should never set this option.
+ if (m_options.reject_end_of_stream_marker)
+ return Error::from_string_literal("An end-of-stream marker was found, but the LZMA stream is configured to reject them");
+
// The range decoder condition is checked after breaking out of the loop.
m_found_end_of_stream_marker = true;
continue;
}
+ // If we are looking for EOS, but haven't found it here, the stream is corrupted.
+ if (has_reached_expected_data_size())
+ return Error::from_string_literal("First simple match after the expected uncompressed size is not the EOS marker");
+
// "If uncompressed size is defined, LZMA decoder must check that it doesn't
// exceed that specified uncompressed size."
// This is being checked for in the common "copy to buffer" implementation.
@@ -598,6 +610,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
continue;
}
+ // If we are looking for EOS, but find another match type, the stream is also corrupted.
+ if (has_reached_expected_data_size())
+ return Error::from_string_literal("First match type after the expected uncompressed size is not a simple match");
+
// " 1 - Rep Match
// IsRepG0[state] decode
// 0 - the distance is rep0"
@@ -666,7 +682,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
TRY(copy_match_to_buffer(normalized_length + normalized_to_real_match_length_offset));
}
- if (m_found_end_of_stream_marker) {
+ if (m_found_end_of_stream_marker || has_reached_expected_data_size()) {
+ if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
+ return Error::from_string_literal("Found end-of-stream marker earlier than expected");
+
if (!is_range_decoder_in_clean_state())
return Error::from_string_literal("LZMA stream ends in an unclean state");
}
diff --git a/Userland/Libraries/LibCompress/Lzma.h b/Userland/Libraries/LibCompress/Lzma.h
index bfd9cc7070..b476118032 100644
--- a/Userland/Libraries/LibCompress/Lzma.h
+++ b/Userland/Libraries/LibCompress/Lzma.h
@@ -29,6 +29,7 @@ struct LzmaDecompressorOptions {
u8 position_bits { 0 };
u32 dictionary_size { 0 };
Optional<u64> uncompressed_size;
+ bool reject_end_of_stream_marker { false };
};
// Described in section "lzma file format".
diff --git a/Userland/Libraries/LibCompress/Lzma2.cpp b/Userland/Libraries/LibCompress/Lzma2.cpp
index 3d31c8cab0..92aa8c7faa 100644
--- a/Userland/Libraries/LibCompress/Lzma2.cpp
+++ b/Userland/Libraries/LibCompress/Lzma2.cpp
@@ -106,6 +106,9 @@ ErrorOr<Bytes> Lzma2Decompressor::read_some(Bytes bytes)
.position_bits = properties.position_bits,
.dictionary_size = static_cast<u32>(dictionary_size),
.uncompressed_size = uncompressed_size,
+
+ // Note: This is not specified anywhere. However, it is apparently tested by bad-1-lzma2-7.xz from the XZ utils test files.
+ .reject_end_of_stream_marker = true,
};
[[fallthrough]];
}