diff options
author | asynts <asynts@gmail.com> | 2020-08-28 17:53:57 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-08-30 09:56:10 +0200 |
commit | 3f1dfc2e978c9b7c3f07a62ba5f52e4012560101 (patch) | |
tree | 44ced4e680e2b3f0b82001397799c8985c220e52 | |
parent | 966445373964c0b730d9eea902faf8a59a1758fd (diff) | |
download | serenity-3f1dfc2e978c9b7c3f07a62ba5f52e4012560101.zip |
LibCompress: Implement gzip.
-rw-r--r-- | Libraries/LibCompress/CMakeLists.txt | 3 | ||||
-rw-r--r-- | Libraries/LibCompress/Gzip.cpp | 186 | ||||
-rw-r--r-- | Libraries/LibCompress/Gzip.h | 92 | ||||
-rw-r--r-- | Userland/test-compress.cpp | 34 |
4 files changed, 314 insertions, 1 deletions
diff --git a/Libraries/LibCompress/CMakeLists.txt b/Libraries/LibCompress/CMakeLists.txt index a61ff5a486..f76ba8d5b5 100644 --- a/Libraries/LibCompress/CMakeLists.txt +++ b/Libraries/LibCompress/CMakeLists.txt @@ -1,7 +1,8 @@ set(SOURCES Deflate.cpp Zlib.cpp + Gzip.cpp ) serenity_lib(LibCompress compression) -target_link_libraries(LibCompress LibC) +target_link_libraries(LibCompress LibC LibCrypto) diff --git a/Libraries/LibCompress/Gzip.cpp b/Libraries/LibCompress/Gzip.cpp new file mode 100644 index 0000000000..3e7170342c --- /dev/null +++ b/Libraries/LibCompress/Gzip.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <LibCompress/Gzip.h> + +#include <AK/String.h> + +namespace Compress { + +bool GzipDecompressor::BlockHeader::valid_magic_number() const +{ + return identification_1 == 0x1f && identification_2 == 0x8b; +} + +bool GzipDecompressor::BlockHeader::supported_by_implementation() const +{ + if (compression_method != 0x08) { + // RFC 1952 does not define any compression methods other than deflate. + return false; + } + + if (flags > Flags::MAX) { + // RFC 1952 does not define any more flags. + return false; + } + + if (flags & Flags::FHCRC) { + TODO(); + } + + return true; +} + +GzipDecompressor::GzipDecompressor(InputStream& stream) + : m_input_stream(stream) +{ +} + +GzipDecompressor::~GzipDecompressor() +{ + m_current_member.clear(); +} + +// FIXME: Again, there are surely a ton of bugs because the code doesn't check for read errors. +size_t GzipDecompressor::read(Bytes bytes) +{ + if (m_current_member.has_value()) { + size_t nread = current_member().m_stream.read(bytes); + current_member().m_checksum.update(bytes.trim(nread)); + current_member().m_nread += nread; + + if (nread < bytes.size()) { + LittleEndian<u32> crc32, input_size; + m_input_stream >> crc32 >> input_size; + + if (crc32 != current_member().m_checksum.digest()) { + m_error = true; + return 0; + } + + if (input_size != current_member().m_nread) { + m_error = true; + return 0; + } + + m_current_member.clear(); + + return nread + read(bytes.slice(nread)); + } + + return nread; + } else { + if (m_input_stream.eof()) + return 0; + + // FIXME: This fails with the new changes? + BlockHeader header; + m_input_stream >> Bytes { &header, sizeof(header) }; + + if (!header.valid_magic_number() || !header.supported_by_implementation()) { + m_error = true; + return 0; + } + + if (header.flags & Flags::FEXTRA) { + LittleEndian<u16> subfield_id, length; + m_input_stream >> subfield_id >> length; + m_input_stream.discard_or_error(length); + } + + if (header.flags & Flags::FNAME) { + String original_filename; + m_input_stream >> original_filename; + } + + if (header.flags & Flags::FCOMMENT) { + String comment; + m_input_stream >> comment; + } + + m_current_member.emplace(header, m_input_stream); + return read(bytes); + } +} + +bool GzipDecompressor::read_or_error(Bytes bytes) +{ + if (read(bytes) < bytes.size()) { + m_error = true; + return false; + } + + return true; +} + +bool GzipDecompressor::discard_or_error(size_t count) +{ + u8 buffer[4096]; + + size_t ndiscarded = 0; + while (ndiscarded < count) { + if (eof()) { + m_error = true; + return false; + } + + ndiscarded += read({ buffer, min<size_t>(count - ndiscarded, sizeof(buffer)) }); + } + + return true; +} + +ByteBuffer GzipDecompressor::decompress_all(ReadonlyBytes bytes) +{ + InputMemoryStream memory_stream { bytes }; + GzipDecompressor gzip_stream { memory_stream }; + + auto buffer = ByteBuffer::create_uninitialized(4096); + + size_t nread = 0; + while (!gzip_stream.eof()) { + nread += gzip_stream.read(buffer.bytes().slice(nread)); + + if (buffer.size() - nread < 4096) + buffer.grow(buffer.size() + 4096); + } + + buffer.trim(nread); + return buffer; +} + +bool GzipDecompressor::eof() const +{ + if (m_current_member.has_value()) { + // FIXME: There is an ugly edge case where we read the whole deflate block + // but haven't read CRC32 and ISIZE. + return current_member().m_stream.eof() && m_input_stream.eof(); + } else { + return m_input_stream.eof(); + } +} + +} diff --git a/Libraries/LibCompress/Gzip.h b/Libraries/LibCompress/Gzip.h new file mode 100644 index 0000000000..72e5e49aba --- /dev/null +++ b/Libraries/LibCompress/Gzip.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <LibCompress/Deflate.h> +#include <LibCrypto/Checksum/CRC32.h> + +namespace Compress { + +class GzipDecompressor final : public InputStream { +public: + GzipDecompressor(InputStream&); + ~GzipDecompressor(); + + size_t read(Bytes) override; + bool read_or_error(Bytes) override; + bool discard_or_error(size_t) override; + bool eof() const override; + + static ByteBuffer decompress_all(ReadonlyBytes); + +private: + struct [[gnu::packed]] BlockHeader + { + u8 identification_1; + u8 identification_2; + u8 compression_method; + u8 flags; + LittleEndian<u32> modification_time; + u8 extra_flags; + u8 operating_system; + + bool valid_magic_number() const; + bool supported_by_implementation() const; + }; + + struct Flags { + static constexpr u8 FTEXT = 1 << 0; + static constexpr u8 FHCRC = 1 << 1; + static constexpr u8 FEXTRA = 1 << 2; + static constexpr u8 FNAME = 1 << 3; + static constexpr u8 FCOMMENT = 1 << 4; + + static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT; + }; + + class Member { + public: + Member(BlockHeader header, InputStream& stream) + : m_header(header) + , m_stream(stream) + { + } + + BlockHeader m_header; + DeflateDecompressor m_stream; + Crypto::Checksum::CRC32 m_checksum; + size_t m_nread { 0 }; + }; + + const Member& current_member() const { return m_current_member.value(); } + Member& current_member() { return m_current_member.value(); } + + InputStream& m_input_stream; + Optional<Member> m_current_member; +}; + +} diff --git a/Userland/test-compress.cpp b/Userland/test-compress.cpp index 87473970c6..bf674fc4fd 100644 --- a/Userland/test-compress.cpp +++ b/Userland/test-compress.cpp @@ -27,6 +27,7 @@ #include <AK/TestSuite.h> #include <LibCompress/Deflate.h> +#include <LibCompress/Gzip.h> #include <LibCompress/Zlib.h> static bool compare(ReadonlyBytes lhs, ReadonlyBytes rhs) @@ -119,4 +120,37 @@ TEST_CASE(zlib_decompress_simple) EXPECT(compare({ uncompressed, sizeof(uncompressed) - 1 }, decompressed.bytes())); } +TEST_CASE(gzip_decompress_simple) +{ + const u8 compressed[] = { + 0x1f, 0x8b, 0x08, 0x00, 0x77, 0xff, 0x47, 0x5f, 0x02, 0xff, 0x2b, 0xcf, + 0x2f, 0x4a, 0x31, 0x54, 0x48, 0x4c, 0x4a, 0x56, 0x28, 0x07, 0xb2, 0x8c, + 0x00, 0xc2, 0x1d, 0x22, 0x15, 0x0f, 0x00, 0x00, 0x00 + }; + + const u8 uncompressed[] = "word1 abc word2"; + + const auto decompressed = Compress::GzipDecompressor::decompress_all({ compressed, sizeof(compressed) }); + + EXPECT(compare({ uncompressed, sizeof(uncompressed) - 1 }, decompressed.bytes())); +} + +TEST_CASE(gzip_multiple_members) +{ + + const u8 compressed[] = { + 0x1f, 0x8b, 0x08, 0x00, 0xe0, 0x03, 0x48, 0x5f, 0x02, 0xff, 0x4b, 0x4c, + 0x4a, 0x4e, 0x4c, 0x4a, 0x06, 0x00, 0x4c, 0x99, 0x6e, 0x72, 0x06, 0x00, + 0x00, 0x00, 0x1f, 0x8b, 0x08, 0x00, 0xe0, 0x03, 0x48, 0x5f, 0x02, 0xff, + 0x4b, 0x4c, 0x4a, 0x4e, 0x4c, 0x4a, 0x06, 0x00, 0x4c, 0x99, 0x6e, 0x72, + 0x06, 0x00, 0x00, 0x00 + }; + + const u8 uncompressed[] = "abcabcabcabc"; + + const auto decompressed = Compress::GzipDecompressor::decompress_all({ compressed, sizeof(compressed) }); + + EXPECT(compare({ uncompressed, sizeof(uncompressed) - 1 }, decompressed.bytes())); +} + TEST_MAIN(Compress) |