diff options
Diffstat (limited to 'Libraries')
-rw-r--r-- | Libraries/LibCompress/Deflate.cpp | 98 | ||||
-rw-r--r-- | Libraries/LibCompress/Deflate.h | 127 | ||||
-rw-r--r-- | Libraries/LibCompress/Zlib.cpp | 2 | ||||
-rw-r--r-- | Libraries/LibCompress/Zlib.h | 1 |
4 files changed, 159 insertions, 69 deletions
diff --git a/Libraries/LibCompress/Deflate.cpp b/Libraries/LibCompress/Deflate.cpp index 9bd05f00da..c321ac775b 100644 --- a/Libraries/LibCompress/Deflate.cpp +++ b/Libraries/LibCompress/Deflate.cpp @@ -33,39 +33,38 @@ namespace Compress { -Vector<u8> Deflate::decompress() +bool DeflateStream::read_next_block() const { - bool is_final_block = false; - - do { - is_final_block = m_reader.read(); - auto block_type = m_reader.read_bits(2); - - switch (block_type) { - case 0: - decompress_uncompressed_block(); - break; - case 1: - decompress_static_block(); - break; - case 2: - decompress_dynamic_block(); - break; - case 3: - dbg() << "Block contains reserved block type..."; - ASSERT_NOT_REACHED(); - break; - default: - dbg() << "Invalid block type was read..."; - ASSERT_NOT_REACHED(); - break; - } - } while (!is_final_block); + if (m_read_last_block) + return false; + + m_read_last_block = m_reader.read_bits(1); + auto block_type = m_reader.read_bits(2); + + switch (block_type) { + case 0: + decompress_uncompressed_block(); + break; + case 1: + decompress_static_block(); + break; + case 2: + decompress_dynamic_block(); + break; + case 3: + dbg() << "Block contains reserved block type..."; + ASSERT_NOT_REACHED(); + break; + default: + dbg() << "Invalid block type was read..."; + ASSERT_NOT_REACHED(); + break; + } - return m_output_buffer; + return true; } -void Deflate::decompress_uncompressed_block() +void DeflateStream::decompress_uncompressed_block() const { // Align to the next byte boundary. while (m_reader.get_bit_byte_offset() != 0) { @@ -87,17 +86,16 @@ void Deflate::decompress_uncompressed_block() ASSERT_NOT_REACHED(); } - m_output_buffer.append(byte); - m_history_buffer.enqueue(byte); + m_intermediate_stream << byte; } } -void Deflate::decompress_static_block() +void DeflateStream::decompress_static_block() const { decompress_huffman_block(m_literal_length_codes, &m_fixed_distance_codes); } -void Deflate::decompress_dynamic_block() +void DeflateStream::decompress_dynamic_block() const { auto codes = decode_huffman_codes(); if (codes.size() == 2) { @@ -107,7 +105,7 @@ void Deflate::decompress_dynamic_block() } } -void Deflate::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCode* distance_codes) +void DeflateStream::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCode* distance_codes) const { for (;;) { u32 symbol = length_codes.next_symbol(m_reader); @@ -119,8 +117,7 @@ void Deflate::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCod // literal byte. if (symbol < 256) { - m_history_buffer.enqueue(symbol); - m_output_buffer.append(symbol); + m_intermediate_stream << static_cast<u8>(symbol); continue; } @@ -144,7 +141,7 @@ void Deflate::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCod } } -Vector<CanonicalCode> Deflate::decode_huffman_codes() +Vector<CanonicalCode> DeflateStream::decode_huffman_codes() const { // FIXME: This path is not tested. Vector<CanonicalCode> result; @@ -244,7 +241,7 @@ Vector<CanonicalCode> Deflate::decode_huffman_codes() return result; } -u32 Deflate::decode_run_length(u32 symbol) +u32 DeflateStream::decode_run_length(u32 symbol) const { if (symbol <= 264) { return symbol - 254; @@ -263,7 +260,7 @@ u32 Deflate::decode_run_length(u32 symbol) ASSERT_NOT_REACHED(); } -u32 Deflate::decode_distance(u32 symbol) +u32 DeflateStream::decode_distance(u32 symbol) const { if (symbol <= 3) { return symbol + 1; @@ -278,15 +275,19 @@ u32 Deflate::decode_distance(u32 symbol) ASSERT_NOT_REACHED(); } -void Deflate::copy_from_history(u32 distance, u32 run) +void DeflateStream::copy_from_history(u32 distance, u32 run) const { - auto head_index = (m_history_buffer.head_index() + m_history_buffer.size()) % m_history_buffer.capacity(); - auto read_index = (head_index - distance + m_history_buffer.capacity()) % m_history_buffer.capacity(); - for (size_t i = 0; i < run; i++) { - auto data = m_history_buffer.at(read_index++); - m_output_buffer.append(data); - m_history_buffer.enqueue(data); + u8 byte; + + // FIXME: In many cases we can read more than one byte at a time, this should + // be refactored into a while loop. Beware, edge case: + // + // // The first four bytes are on the stream already, the other four + // // are written by copy_from_history() itself. + // copy_from_history(4, 8); + m_intermediate_stream.read({ &byte, sizeof(byte) }, m_intermediate_stream.woffset() - distance); + m_intermediate_stream << byte; } } @@ -335,7 +336,7 @@ u32 BitStreamReader::read_bits(u8 count) return result; } -Vector<u8> Deflate::generate_literal_length_codes() +Vector<u8> DeflateStream::generate_literal_length_codes() const { Vector<u8> ll_codes; ll_codes.resize(288); @@ -346,7 +347,7 @@ Vector<u8> Deflate::generate_literal_length_codes() return ll_codes; } -Vector<u8> Deflate::generate_fixed_distance_codes() +Vector<u8> DeflateStream::generate_fixed_distance_codes() const { Vector<u8> fd_codes; fd_codes.resize(32); @@ -423,4 +424,5 @@ u32 CanonicalCode::next_symbol(BitStreamReader& reader) } } } + } diff --git a/Libraries/LibCompress/Deflate.h b/Libraries/LibCompress/Deflate.h index 54e053155b..fc138f52df 100644 --- a/Libraries/LibCompress/Deflate.h +++ b/Libraries/LibCompress/Deflate.h @@ -28,6 +28,7 @@ #include <AK/CircularQueue.h> #include <AK/Span.h> +#include <AK/Stream.h> #include <AK/Types.h> #include <AK/Vector.h> #include <cstring> @@ -65,34 +66,120 @@ private: Vector<u32> m_symbol_values; }; -class Deflate { +// Implements a DEFLATE decompressor according to RFC 1951. +class DeflateStream final : public InputStream { public: - Deflate(ReadonlyBytes data) + // FIXME: This should really return a ByteBuffer. + static Vector<u8> decompress_all(ReadonlyBytes bytes) + { + DeflateStream stream { bytes }; + while (stream.read_next_block()) { + } + + Vector<u8> vector; + vector.resize(stream.m_intermediate_stream.remaining()); + stream >> vector; + + return vector; + } + + DeflateStream(ReadonlyBytes data) : m_reader(data) , m_literal_length_codes(generate_literal_length_codes()) , m_fixed_distance_codes(generate_fixed_distance_codes()) { } - Vector<u8> decompress(); + // FIXME: Accept an InputStream. + + size_t read(Bytes bytes) override + { + if (m_intermediate_stream.remaining() >= bytes.size()) + return m_intermediate_stream.read_or_error(bytes); + + while (read_next_block()) { + if (m_intermediate_stream.remaining() >= bytes.size()) + return m_intermediate_stream.read_or_error(bytes); + } + + return m_intermediate_stream.read(bytes); + } + + bool read_or_error(Bytes bytes) override + { + if (m_intermediate_stream.remaining() >= bytes.size()) { + m_intermediate_stream.read_or_error(bytes); + return true; + } + + while (read_next_block()) { + if (m_intermediate_stream.remaining() >= bytes.size()) { + m_intermediate_stream.read_or_error(bytes); + return true; + } + } + + m_error = true; + return false; + } + + bool eof() const override + { + if (!m_intermediate_stream.eof()) + return false; + + while (read_next_block()) { + if (!m_intermediate_stream.eof()) + return false; + } + + return true; + } + + bool discard_or_error(size_t count) override + { + if (m_intermediate_stream.remaining() >= count) { + m_intermediate_stream.discard_or_error(count); + return true; + } + + while (read_next_block()) { + if (m_intermediate_stream.remaining() >= count) { + m_intermediate_stream.discard_or_error(count); + return true; + } + } + + m_error = true; + return false; + } private: - void decompress_uncompressed_block(); - void decompress_static_block(); - void decompress_dynamic_block(); - void decompress_huffman_block(CanonicalCode&, CanonicalCode*); - Vector<CanonicalCode> decode_huffman_codes(); - void copy_from_history(u32, u32); - u32 decode_run_length(u32); - u32 decode_distance(u32); - Vector<u8> generate_literal_length_codes(); - Vector<u8> generate_fixed_distance_codes(); - - BitStreamReader m_reader; - CircularQueue<u8, 32 * 1024> m_history_buffer; - Vector<u8, 256> m_output_buffer; - - CanonicalCode m_literal_length_codes; - CanonicalCode m_fixed_distance_codes; + void decompress_uncompressed_block() const; + void decompress_static_block() const; + void decompress_dynamic_block() const; + void decompress_huffman_block(CanonicalCode&, CanonicalCode*) const; + + Vector<CanonicalCode> decode_huffman_codes() const; + u32 decode_run_length(u32) const; + u32 decode_distance(u32) const; + + void copy_from_history(u32, u32) const; + + Vector<u8> generate_literal_length_codes() const; + Vector<u8> generate_fixed_distance_codes() const; + + mutable BitStreamReader m_reader; + + mutable CanonicalCode m_literal_length_codes; + mutable CanonicalCode m_fixed_distance_codes; + + // FIXME: Theoretically, blocks can be extremly large, reading a single block could + // exhaust memory. Maybe wait for C++20 coroutines? + bool read_next_block() const; + + mutable bool m_read_last_block { false }; + mutable DuplexMemoryStream m_intermediate_stream; }; + } diff --git a/Libraries/LibCompress/Zlib.cpp b/Libraries/LibCompress/Zlib.cpp index 2ce9a415b6..15eb13f8bd 100644 --- a/Libraries/LibCompress/Zlib.cpp +++ b/Libraries/LibCompress/Zlib.cpp @@ -57,7 +57,7 @@ Zlib::Zlib(ReadonlyBytes data) Vector<u8> Zlib::decompress() { - return Deflate(m_data_bytes).decompress(); + return DeflateStream::decompress_all(m_data_bytes); } u32 Zlib::checksum() diff --git a/Libraries/LibCompress/Zlib.h b/Libraries/LibCompress/Zlib.h index cd99bbb0f0..8336527e39 100644 --- a/Libraries/LibCompress/Zlib.h +++ b/Libraries/LibCompress/Zlib.h @@ -49,4 +49,5 @@ private: ReadonlyBytes m_input_data; ReadonlyBytes m_data_bytes; }; + } |