summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorasynts <asynts@gmail.com>2020-08-28 17:53:57 +0200
committerAndreas Kling <kling@serenityos.org>2020-08-30 09:56:10 +0200
commit3f1dfc2e978c9b7c3f07a62ba5f52e4012560101 (patch)
tree44ced4e680e2b3f0b82001397799c8985c220e52
parent966445373964c0b730d9eea902faf8a59a1758fd (diff)
downloadserenity-3f1dfc2e978c9b7c3f07a62ba5f52e4012560101.zip
LibCompress: Implement gzip.
-rw-r--r--Libraries/LibCompress/CMakeLists.txt3
-rw-r--r--Libraries/LibCompress/Gzip.cpp186
-rw-r--r--Libraries/LibCompress/Gzip.h92
-rw-r--r--Userland/test-compress.cpp34
4 files changed, 314 insertions, 1 deletions
diff --git a/Libraries/LibCompress/CMakeLists.txt b/Libraries/LibCompress/CMakeLists.txt
index a61ff5a486..f76ba8d5b5 100644
--- a/Libraries/LibCompress/CMakeLists.txt
+++ b/Libraries/LibCompress/CMakeLists.txt
@@ -1,7 +1,8 @@
set(SOURCES
Deflate.cpp
Zlib.cpp
+ Gzip.cpp
)
serenity_lib(LibCompress compression)
-target_link_libraries(LibCompress LibC)
+target_link_libraries(LibCompress LibC LibCrypto)
diff --git a/Libraries/LibCompress/Gzip.cpp b/Libraries/LibCompress/Gzip.cpp
new file mode 100644
index 0000000000..3e7170342c
--- /dev/null
+++ b/Libraries/LibCompress/Gzip.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020, the SerenityOS developers.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <LibCompress/Gzip.h>
+
+#include <AK/String.h>
+
+namespace Compress {
+
+bool GzipDecompressor::BlockHeader::valid_magic_number() const
+{
+ return identification_1 == 0x1f && identification_2 == 0x8b;
+}
+
+bool GzipDecompressor::BlockHeader::supported_by_implementation() const
+{
+ if (compression_method != 0x08) {
+ // RFC 1952 does not define any compression methods other than deflate.
+ return false;
+ }
+
+ if (flags > Flags::MAX) {
+ // RFC 1952 does not define any more flags.
+ return false;
+ }
+
+ if (flags & Flags::FHCRC) {
+ TODO();
+ }
+
+ return true;
+}
+
+GzipDecompressor::GzipDecompressor(InputStream& stream)
+ : m_input_stream(stream)
+{
+}
+
+GzipDecompressor::~GzipDecompressor()
+{
+ m_current_member.clear();
+}
+
+// FIXME: Again, there are surely a ton of bugs because the code doesn't check for read errors.
+size_t GzipDecompressor::read(Bytes bytes)
+{
+ if (m_current_member.has_value()) {
+ size_t nread = current_member().m_stream.read(bytes);
+ current_member().m_checksum.update(bytes.trim(nread));
+ current_member().m_nread += nread;
+
+ if (nread < bytes.size()) {
+ LittleEndian<u32> crc32, input_size;
+ m_input_stream >> crc32 >> input_size;
+
+ if (crc32 != current_member().m_checksum.digest()) {
+ m_error = true;
+ return 0;
+ }
+
+ if (input_size != current_member().m_nread) {
+ m_error = true;
+ return 0;
+ }
+
+ m_current_member.clear();
+
+ return nread + read(bytes.slice(nread));
+ }
+
+ return nread;
+ } else {
+ if (m_input_stream.eof())
+ return 0;
+
+ // FIXME: This fails with the new changes?
+ BlockHeader header;
+ m_input_stream >> Bytes { &header, sizeof(header) };
+
+ if (!header.valid_magic_number() || !header.supported_by_implementation()) {
+ m_error = true;
+ return 0;
+ }
+
+ if (header.flags & Flags::FEXTRA) {
+ LittleEndian<u16> subfield_id, length;
+ m_input_stream >> subfield_id >> length;
+ m_input_stream.discard_or_error(length);
+ }
+
+ if (header.flags & Flags::FNAME) {
+ String original_filename;
+ m_input_stream >> original_filename;
+ }
+
+ if (header.flags & Flags::FCOMMENT) {
+ String comment;
+ m_input_stream >> comment;
+ }
+
+ m_current_member.emplace(header, m_input_stream);
+ return read(bytes);
+ }
+}
+
+bool GzipDecompressor::read_or_error(Bytes bytes)
+{
+ if (read(bytes) < bytes.size()) {
+ m_error = true;
+ return false;
+ }
+
+ return true;
+}
+
+bool GzipDecompressor::discard_or_error(size_t count)
+{
+ u8 buffer[4096];
+
+ size_t ndiscarded = 0;
+ while (ndiscarded < count) {
+ if (eof()) {
+ m_error = true;
+ return false;
+ }
+
+ ndiscarded += read({ buffer, min<size_t>(count - ndiscarded, sizeof(buffer)) });
+ }
+
+ return true;
+}
+
+ByteBuffer GzipDecompressor::decompress_all(ReadonlyBytes bytes)
+{
+ InputMemoryStream memory_stream { bytes };
+ GzipDecompressor gzip_stream { memory_stream };
+
+ auto buffer = ByteBuffer::create_uninitialized(4096);
+
+ size_t nread = 0;
+ while (!gzip_stream.eof()) {
+ nread += gzip_stream.read(buffer.bytes().slice(nread));
+
+ if (buffer.size() - nread < 4096)
+ buffer.grow(buffer.size() + 4096);
+ }
+
+ buffer.trim(nread);
+ return buffer;
+}
+
+bool GzipDecompressor::eof() const
+{
+ if (m_current_member.has_value()) {
+ // FIXME: There is an ugly edge case where we read the whole deflate block
+ // but haven't read CRC32 and ISIZE.
+ return current_member().m_stream.eof() && m_input_stream.eof();
+ } else {
+ return m_input_stream.eof();
+ }
+}
+
+}
diff --git a/Libraries/LibCompress/Gzip.h b/Libraries/LibCompress/Gzip.h
new file mode 100644
index 0000000000..72e5e49aba
--- /dev/null
+++ b/Libraries/LibCompress/Gzip.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020, the SerenityOS developers.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <LibCompress/Deflate.h>
+#include <LibCrypto/Checksum/CRC32.h>
+
+namespace Compress {
+
+class GzipDecompressor final : public InputStream {
+public:
+ GzipDecompressor(InputStream&);
+ ~GzipDecompressor();
+
+ size_t read(Bytes) override;
+ bool read_or_error(Bytes) override;
+ bool discard_or_error(size_t) override;
+ bool eof() const override;
+
+ static ByteBuffer decompress_all(ReadonlyBytes);
+
+private:
+ struct [[gnu::packed]] BlockHeader
+ {
+ u8 identification_1;
+ u8 identification_2;
+ u8 compression_method;
+ u8 flags;
+ LittleEndian<u32> modification_time;
+ u8 extra_flags;
+ u8 operating_system;
+
+ bool valid_magic_number() const;
+ bool supported_by_implementation() const;
+ };
+
+ struct Flags {
+ static constexpr u8 FTEXT = 1 << 0;
+ static constexpr u8 FHCRC = 1 << 1;
+ static constexpr u8 FEXTRA = 1 << 2;
+ static constexpr u8 FNAME = 1 << 3;
+ static constexpr u8 FCOMMENT = 1 << 4;
+
+ static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT;
+ };
+
+ class Member {
+ public:
+ Member(BlockHeader header, InputStream& stream)
+ : m_header(header)
+ , m_stream(stream)
+ {
+ }
+
+ BlockHeader m_header;
+ DeflateDecompressor m_stream;
+ Crypto::Checksum::CRC32 m_checksum;
+ size_t m_nread { 0 };
+ };
+
+ const Member& current_member() const { return m_current_member.value(); }
+ Member& current_member() { return m_current_member.value(); }
+
+ InputStream& m_input_stream;
+ Optional<Member> m_current_member;
+};
+
+}
diff --git a/Userland/test-compress.cpp b/Userland/test-compress.cpp
index 87473970c6..bf674fc4fd 100644
--- a/Userland/test-compress.cpp
+++ b/Userland/test-compress.cpp
@@ -27,6 +27,7 @@
#include <AK/TestSuite.h>
#include <LibCompress/Deflate.h>
+#include <LibCompress/Gzip.h>
#include <LibCompress/Zlib.h>
static bool compare(ReadonlyBytes lhs, ReadonlyBytes rhs)
@@ -119,4 +120,37 @@ TEST_CASE(zlib_decompress_simple)
EXPECT(compare({ uncompressed, sizeof(uncompressed) - 1 }, decompressed.bytes()));
}
+TEST_CASE(gzip_decompress_simple)
+{
+ const u8 compressed[] = {
+ 0x1f, 0x8b, 0x08, 0x00, 0x77, 0xff, 0x47, 0x5f, 0x02, 0xff, 0x2b, 0xcf,
+ 0x2f, 0x4a, 0x31, 0x54, 0x48, 0x4c, 0x4a, 0x56, 0x28, 0x07, 0xb2, 0x8c,
+ 0x00, 0xc2, 0x1d, 0x22, 0x15, 0x0f, 0x00, 0x00, 0x00
+ };
+
+ const u8 uncompressed[] = "word1 abc word2";
+
+ const auto decompressed = Compress::GzipDecompressor::decompress_all({ compressed, sizeof(compressed) });
+
+ EXPECT(compare({ uncompressed, sizeof(uncompressed) - 1 }, decompressed.bytes()));
+}
+
+TEST_CASE(gzip_multiple_members)
+{
+
+ const u8 compressed[] = {
+ 0x1f, 0x8b, 0x08, 0x00, 0xe0, 0x03, 0x48, 0x5f, 0x02, 0xff, 0x4b, 0x4c,
+ 0x4a, 0x4e, 0x4c, 0x4a, 0x06, 0x00, 0x4c, 0x99, 0x6e, 0x72, 0x06, 0x00,
+ 0x00, 0x00, 0x1f, 0x8b, 0x08, 0x00, 0xe0, 0x03, 0x48, 0x5f, 0x02, 0xff,
+ 0x4b, 0x4c, 0x4a, 0x4e, 0x4c, 0x4a, 0x06, 0x00, 0x4c, 0x99, 0x6e, 0x72,
+ 0x06, 0x00, 0x00, 0x00
+ };
+
+ const u8 uncompressed[] = "abcabcabcabc";
+
+ const auto decompressed = Compress::GzipDecompressor::decompress_all({ compressed, sizeof(compressed) });
+
+ EXPECT(compare({ uncompressed, sizeof(uncompressed) - 1 }, decompressed.bytes()));
+}
+
TEST_MAIN(Compress)