summaryrefslogtreecommitdiff
path: root/Userland/Libraries
diff options
context:
space:
mode:
authorIdan Horowitz <idan.horowitz@gmail.com>2021-03-18 22:35:14 +0200
committerAndreas Kling <kling@serenityos.org>2021-03-23 16:09:36 +0100
commitcaf4bde3a9396193b28d134fc7ad5a0c92faace6 (patch)
treec4ca2771009b7507a4288b814cc26e909113e03e /Userland/Libraries
parenta809db90ddff280fd523e924789ed907b2b950b9 (diff)
downloadserenity-caf4bde3a9396193b28d134fc7ad5a0c92faace6.zip
LibArchive: Add Zip file parser
This is based on the zip specification on PKWARE's zip specification (https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) and will be used in the unzip utility and eventually in the zip utility.
Diffstat (limited to 'Userland/Libraries')
-rw-r--r--Userland/Libraries/LibArchive/CMakeLists.txt5
-rw-r--r--Userland/Libraries/LibArchive/Zip.cpp120
-rw-r--r--Userland/Libraries/LibArchive/Zip.h234
3 files changed, 357 insertions, 2 deletions
diff --git a/Userland/Libraries/LibArchive/CMakeLists.txt b/Userland/Libraries/LibArchive/CMakeLists.txt
index 29dce137b0..b6fef50a50 100644
--- a/Userland/Libraries/LibArchive/CMakeLists.txt
+++ b/Userland/Libraries/LibArchive/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SOURCES
- TarStream.cpp
-)
+ TarStream.cpp
+ Zip.cpp
+ )
serenity_lib(LibArchive archive)
target_link_libraries(LibArchive LibCore)
diff --git a/Userland/Libraries/LibArchive/Zip.cpp b/Userland/Libraries/LibArchive/Zip.cpp
new file mode 100644
index 0000000000..4c54622085
--- /dev/null
+++ b/Userland/Libraries/LibArchive/Zip.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <LibArchive/Zip.h>
+
+namespace Archive {
+
+bool Zip::find_end_of_central_directory_offset(const ReadonlyBytes& buffer, size_t& offset)
+{
+ for (size_t backwards_offset = 0; backwards_offset <= UINT16_MAX; backwards_offset++) // the file may have a trailing comment of an arbitrary 16 bit length
+ {
+ if (buffer.size() < (sizeof(EndOfCentralDirectory) - sizeof(u8*)) + backwards_offset)
+ return false;
+
+ auto signature_offset = (buffer.size() - (sizeof(EndOfCentralDirectory) - sizeof(u8*)) - backwards_offset);
+ if (memcmp(buffer.data() + signature_offset, end_of_central_directory_signature, sizeof(end_of_central_directory_signature)) == 0) {
+ offset = signature_offset;
+ return true;
+ }
+ }
+ return false;
+}
+
+Optional<Zip> Zip::try_create(const ReadonlyBytes& buffer)
+{
+ size_t end_of_central_directory_offset;
+ if (!find_end_of_central_directory_offset(buffer, end_of_central_directory_offset))
+ return {};
+
+ EndOfCentralDirectory end_of_central_directory {};
+ if (!end_of_central_directory.read(buffer.slice(end_of_central_directory_offset)))
+ return {};
+
+ if (end_of_central_directory.disk_number != 0 || end_of_central_directory.central_directory_start_disk != 0 || end_of_central_directory.disk_records_count != end_of_central_directory.total_records_count)
+ return {}; // TODO: support multi-volume zip archives
+
+ size_t member_offset = end_of_central_directory.central_directory_offset;
+ for (size_t i = 0; i < end_of_central_directory.total_records_count; i++) {
+ CentralDirectoryRecord central_directory_record {};
+ if (!central_directory_record.read(buffer.slice(member_offset)))
+ return {};
+ if (central_directory_record.general_purpose_flags & 1)
+ return {}; // TODO: support encrypted zip members
+ if (central_directory_record.general_purpose_flags & 3)
+ return {}; // TODO: support zip data descriptors
+ if (central_directory_record.compression_method != ZipCompressionMethod::Store && central_directory_record.compression_method != ZipCompressionMethod::Deflate)
+ return {}; // TODO: support obsolete zip compression methods
+ if (central_directory_record.compression_method == ZipCompressionMethod::Store && central_directory_record.uncompressed_size != central_directory_record.compressed_size)
+ return {};
+ if (central_directory_record.start_disk != 0)
+ return {}; // TODO: support multi-volume zip archives
+ if (memchr(central_directory_record.name, 0, central_directory_record.name_length) != nullptr)
+ return {};
+ LocalFileHeader local_file_header {};
+ if (!local_file_header.read(buffer.slice(central_directory_record.local_file_header_offset)))
+ return {};
+ if (buffer.size() - (local_file_header.compressed_data - buffer.data()) < central_directory_record.compressed_size)
+ return {};
+ member_offset += central_directory_record.size();
+ }
+
+ Zip zip;
+ zip.m_input_data = buffer;
+ zip.member_count = end_of_central_directory.total_records_count;
+ zip.members_start_offset = end_of_central_directory.central_directory_offset;
+ return zip;
+}
+
+bool Zip::for_each_member(Function<IterationDecision(const ZipMember&)> callback)
+{
+ size_t member_offset = members_start_offset;
+ for (size_t i = 0; i < member_count; i++) {
+ CentralDirectoryRecord central_directory_record {};
+ VERIFY(central_directory_record.read(m_input_data.slice(member_offset)));
+ LocalFileHeader local_file_header {};
+ VERIFY(local_file_header.read(m_input_data.slice(central_directory_record.local_file_header_offset)));
+
+ ZipMember member;
+ char null_terminated_name[central_directory_record.name_length + 1];
+ memcpy(null_terminated_name, central_directory_record.name, central_directory_record.name_length);
+ null_terminated_name[central_directory_record.name_length] = 0;
+ member.name = String { null_terminated_name };
+ member.compressed_data = { local_file_header.compressed_data, central_directory_record.compressed_size };
+ member.compression_method = static_cast<ZipCompressionMethod>(central_directory_record.compression_method);
+ member.uncompressed_size = central_directory_record.uncompressed_size;
+ member.crc32 = central_directory_record.crc32;
+ member.is_directory = central_directory_record.external_attributes & zip_directory_external_attribute || member.name.ends_with('/'); // FIXME: better directory detection
+
+ if (callback(member) == IterationDecision::Break)
+ return false;
+
+ member_offset += central_directory_record.size();
+ }
+ return true;
+}
+
+}
diff --git a/Userland/Libraries/LibArchive/Zip.h b/Userland/Libraries/LibArchive/Zip.h
new file mode 100644
index 0000000000..dd8c632ec2
--- /dev/null
+++ b/Userland/Libraries/LibArchive/Zip.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/Function.h>
+#include <AK/IterationDecision.h>
+#include <AK/Span.h>
+#include <AK/Stream.h>
+#include <AK/String.h>
+#include <string.h>
+
+namespace Archive {
+
+// NOTE: Due to the format of zip files compression is streamed and decompression is random access.
+
+static constexpr u8 end_of_central_directory_signature[] = { 0x50, 0x4b, 0x05, 0x06 }; // 'PK\x05\x06'
+struct [[gnu::packed]] EndOfCentralDirectory {
+ u16 disk_number;
+ u16 central_directory_start_disk;
+ u16 disk_records_count;
+ u16 total_records_count;
+ u32 central_directory_size;
+ u32 central_directory_offset;
+ u16 comment_length;
+ const u8* comment;
+
+ bool read(ReadonlyBytes buffer)
+ {
+ auto fields_size = sizeof(EndOfCentralDirectory) - sizeof(u8*);
+ if (buffer.size() < fields_size)
+ return false;
+ if (memcmp(buffer.data(), end_of_central_directory_signature, sizeof(end_of_central_directory_signature)) != 0)
+ return false;
+ memcpy(reinterpret_cast<void*>(&disk_number), buffer.data() + sizeof(end_of_central_directory_signature), fields_size);
+ comment = buffer.data() + sizeof(end_of_central_directory_signature) + fields_size;
+ return true;
+ }
+
+ void write(OutputStream& stream) const
+ {
+ stream.write_or_error({ end_of_central_directory_signature, sizeof(end_of_central_directory_signature) });
+ stream << disk_number;
+ stream << central_directory_start_disk;
+ stream << disk_records_count;
+ stream << total_records_count;
+ stream << central_directory_size;
+ stream << central_directory_offset;
+ stream << comment_length;
+ if (comment_length > 0)
+ stream.write_or_error({ comment, comment_length });
+ }
+};
+
+static constexpr u8 central_directory_record_signature[] = { 0x50, 0x4b, 0x01, 0x02 }; // 'PK\x01\x02'
+struct [[gnu::packed]] CentralDirectoryRecord {
+ u16 made_by_version;
+ u16 minimum_version;
+ u16 general_purpose_flags;
+ u16 compression_method;
+ u16 modification_time;
+ u16 modification_date;
+ u32 crc32;
+ u32 compressed_size;
+ u32 uncompressed_size;
+ u16 name_length;
+ u16 extra_data_length;
+ u16 comment_length;
+ u16 start_disk;
+ u16 internal_attributes;
+ u32 external_attributes;
+ u32 local_file_header_offset;
+ const u8* name;
+ const u8* extra_data;
+ const u8* comment;
+
+ bool read(ReadonlyBytes buffer)
+ {
+ auto fields_size = sizeof(CentralDirectoryRecord) - (sizeof(u8*) * 3);
+ if (buffer.size() < fields_size)
+ return false;
+ if (memcmp(buffer.data(), central_directory_record_signature, sizeof(central_directory_record_signature)) != 0)
+ return false;
+ memcpy(reinterpret_cast<void*>(&made_by_version), buffer.data() + sizeof(central_directory_record_signature), fields_size);
+ name = buffer.data() + sizeof(central_directory_record_signature) + fields_size;
+ extra_data = name + name_length;
+ comment = extra_data + extra_data_length;
+ return true;
+ }
+
+ void write(OutputStream& stream) const
+ {
+ stream.write_or_error({ central_directory_record_signature, sizeof(central_directory_record_signature) });
+ stream << made_by_version;
+ stream << minimum_version;
+ stream << general_purpose_flags;
+ stream << compression_method;
+ stream << modification_time;
+ stream << modification_date;
+ stream << crc32;
+ stream << compressed_size;
+ stream << uncompressed_size;
+ stream << name_length;
+ stream << extra_data_length;
+ stream << comment_length;
+ stream << start_disk;
+ stream << internal_attributes;
+ stream << external_attributes;
+ stream << local_file_header_offset;
+ if (name_length > 0)
+ stream.write_or_error({ name, name_length });
+ if (extra_data_length > 0)
+ stream.write_or_error({ extra_data, extra_data_length });
+ if (comment_length > 0)
+ stream.write_or_error({ comment, comment_length });
+ }
+
+ [[nodiscard]] size_t size() const
+ {
+ return sizeof(central_directory_record_signature) + (sizeof(CentralDirectoryRecord) - (sizeof(u8*) * 3)) + name_length + extra_data_length + comment_length;
+ }
+};
+static constexpr u32 zip_directory_external_attribute = 1 << 4;
+
+static constexpr u8 local_file_header_signature[] = { 0x50, 0x4b, 0x03, 0x04 }; // 'PK\x03\x04'
+struct [[gnu::packed]] LocalFileHeader {
+ u16 minimum_version;
+ u16 general_purpose_flags;
+ u16 compression_method;
+ u16 modification_time;
+ u16 modification_date;
+ u32 crc32;
+ u32 compressed_size;
+ u32 uncompressed_size;
+ u16 name_length;
+ u16 extra_data_length;
+ const u8* name;
+ const u8* extra_data;
+ const u8* compressed_data;
+
+ bool read(ReadonlyBytes buffer)
+ {
+ auto fields_size = sizeof(LocalFileHeader) - (sizeof(u8*) * 3);
+ if (buffer.size() < fields_size)
+ return false;
+ if (memcmp(buffer.data(), local_file_header_signature, sizeof(local_file_header_signature)) != 0)
+ return false;
+ memcpy(reinterpret_cast<void*>(&minimum_version), buffer.data() + sizeof(local_file_header_signature), fields_size);
+ name = buffer.data() + sizeof(local_file_header_signature) + fields_size;
+ extra_data = name + name_length;
+ compressed_data = extra_data + extra_data_length;
+ return true;
+ }
+
+ void write(OutputStream& stream) const
+ {
+ stream.write_or_error({ local_file_header_signature, sizeof(local_file_header_signature) });
+ stream << minimum_version;
+ stream << general_purpose_flags;
+ stream << compression_method;
+ stream << modification_time;
+ stream << modification_date;
+ stream << crc32;
+ stream << compressed_size;
+ stream << uncompressed_size;
+ stream << name_length;
+ stream << extra_data_length;
+ if (name_length > 0)
+ stream.write_or_error({ name, name_length });
+ if (extra_data_length > 0)
+ stream.write_or_error({ extra_data, extra_data_length });
+ if (compressed_size > 0)
+ stream.write_or_error({ compressed_data, compressed_size });
+ }
+};
+
+enum ZipCompressionMethod : u16 {
+ Store = 0,
+ Shrink = 1,
+ Reduce1 = 2,
+ Reduce2 = 3,
+ Reduce3 = 4,
+ Reduce4 = 5,
+ Implode = 6,
+ Reserved = 7,
+ Deflate = 8
+};
+
+struct ZipMember {
+ String name;
+ ReadonlyBytes compressed_data; // TODO: maybe the decompression/compression should be handled by LibArchive instead of the user?
+ ZipCompressionMethod compression_method;
+ u32 uncompressed_size;
+ u32 crc32;
+ bool is_directory;
+};
+
+class Zip {
+public:
+ static Optional<Zip> try_create(const ReadonlyBytes& buffer);
+ bool for_each_member(Function<IterationDecision(const ZipMember&)>);
+
+private:
+ static bool find_end_of_central_directory_offset(const ReadonlyBytes&, size_t& offset);
+
+ u16 member_count { 0 };
+ size_t members_start_offset { 0 };
+ ReadonlyBytes m_input_data;
+};
+
+}