diff options
author | Idan Horowitz <idan.horowitz@gmail.com> | 2021-03-18 22:35:14 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-03-23 16:09:36 +0100 |
commit | caf4bde3a9396193b28d134fc7ad5a0c92faace6 (patch) | |
tree | c4ca2771009b7507a4288b814cc26e909113e03e /Userland/Libraries | |
parent | a809db90ddff280fd523e924789ed907b2b950b9 (diff) | |
download | serenity-caf4bde3a9396193b28d134fc7ad5a0c92faace6.zip |
LibArchive: Add Zip file parser
This is based on the zip specification on PKWARE's zip specification
(https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) and will
be used in the unzip utility and eventually in the zip utility.
Diffstat (limited to 'Userland/Libraries')
-rw-r--r-- | Userland/Libraries/LibArchive/CMakeLists.txt | 5 | ||||
-rw-r--r-- | Userland/Libraries/LibArchive/Zip.cpp | 120 | ||||
-rw-r--r-- | Userland/Libraries/LibArchive/Zip.h | 234 |
3 files changed, 357 insertions, 2 deletions
diff --git a/Userland/Libraries/LibArchive/CMakeLists.txt b/Userland/Libraries/LibArchive/CMakeLists.txt index 29dce137b0..b6fef50a50 100644 --- a/Userland/Libraries/LibArchive/CMakeLists.txt +++ b/Userland/Libraries/LibArchive/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES - TarStream.cpp -) + TarStream.cpp + Zip.cpp + ) serenity_lib(LibArchive archive) target_link_libraries(LibArchive LibCore) diff --git a/Userland/Libraries/LibArchive/Zip.cpp b/Userland/Libraries/LibArchive/Zip.cpp new file mode 100644 index 0000000000..4c54622085 --- /dev/null +++ b/Userland/Libraries/LibArchive/Zip.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <LibArchive/Zip.h> + +namespace Archive { + +bool Zip::find_end_of_central_directory_offset(const ReadonlyBytes& buffer, size_t& offset) +{ + for (size_t backwards_offset = 0; backwards_offset <= UINT16_MAX; backwards_offset++) // the file may have a trailing comment of an arbitrary 16 bit length + { + if (buffer.size() < (sizeof(EndOfCentralDirectory) - sizeof(u8*)) + backwards_offset) + return false; + + auto signature_offset = (buffer.size() - (sizeof(EndOfCentralDirectory) - sizeof(u8*)) - backwards_offset); + if (memcmp(buffer.data() + signature_offset, end_of_central_directory_signature, sizeof(end_of_central_directory_signature)) == 0) { + offset = signature_offset; + return true; + } + } + return false; +} + +Optional<Zip> Zip::try_create(const ReadonlyBytes& buffer) +{ + size_t end_of_central_directory_offset; + if (!find_end_of_central_directory_offset(buffer, end_of_central_directory_offset)) + return {}; + + EndOfCentralDirectory end_of_central_directory {}; + if (!end_of_central_directory.read(buffer.slice(end_of_central_directory_offset))) + return {}; + + if (end_of_central_directory.disk_number != 0 || end_of_central_directory.central_directory_start_disk != 0 || end_of_central_directory.disk_records_count != end_of_central_directory.total_records_count) + return {}; // TODO: support multi-volume zip archives + + size_t member_offset = end_of_central_directory.central_directory_offset; + for (size_t i = 0; i < end_of_central_directory.total_records_count; i++) { + CentralDirectoryRecord central_directory_record {}; + if (!central_directory_record.read(buffer.slice(member_offset))) + return {}; + if (central_directory_record.general_purpose_flags & 1) + return {}; // TODO: support encrypted zip members + if (central_directory_record.general_purpose_flags & 3) + return {}; // TODO: support zip data descriptors + if (central_directory_record.compression_method != ZipCompressionMethod::Store && central_directory_record.compression_method != ZipCompressionMethod::Deflate) + return {}; // TODO: support obsolete zip compression methods + if (central_directory_record.compression_method == ZipCompressionMethod::Store && central_directory_record.uncompressed_size != central_directory_record.compressed_size) + return {}; + if (central_directory_record.start_disk != 0) + return {}; // TODO: support multi-volume zip archives + if (memchr(central_directory_record.name, 0, central_directory_record.name_length) != nullptr) + return {}; + LocalFileHeader local_file_header {}; + if (!local_file_header.read(buffer.slice(central_directory_record.local_file_header_offset))) + return {}; + if (buffer.size() - (local_file_header.compressed_data - buffer.data()) < central_directory_record.compressed_size) + return {}; + member_offset += central_directory_record.size(); + } + + Zip zip; + zip.m_input_data = buffer; + zip.member_count = end_of_central_directory.total_records_count; + zip.members_start_offset = end_of_central_directory.central_directory_offset; + return zip; +} + +bool Zip::for_each_member(Function<IterationDecision(const ZipMember&)> callback) +{ + size_t member_offset = members_start_offset; + for (size_t i = 0; i < member_count; i++) { + CentralDirectoryRecord central_directory_record {}; + VERIFY(central_directory_record.read(m_input_data.slice(member_offset))); + LocalFileHeader local_file_header {}; + VERIFY(local_file_header.read(m_input_data.slice(central_directory_record.local_file_header_offset))); + + ZipMember member; + char null_terminated_name[central_directory_record.name_length + 1]; + memcpy(null_terminated_name, central_directory_record.name, central_directory_record.name_length); + null_terminated_name[central_directory_record.name_length] = 0; + member.name = String { null_terminated_name }; + member.compressed_data = { local_file_header.compressed_data, central_directory_record.compressed_size }; + member.compression_method = static_cast<ZipCompressionMethod>(central_directory_record.compression_method); + member.uncompressed_size = central_directory_record.uncompressed_size; + member.crc32 = central_directory_record.crc32; + member.is_directory = central_directory_record.external_attributes & zip_directory_external_attribute || member.name.ends_with('/'); // FIXME: better directory detection + + if (callback(member) == IterationDecision::Break) + return false; + + member_offset += central_directory_record.size(); + } + return true; +} + +} diff --git a/Userland/Libraries/LibArchive/Zip.h b/Userland/Libraries/LibArchive/Zip.h new file mode 100644 index 0000000000..dd8c632ec2 --- /dev/null +++ b/Userland/Libraries/LibArchive/Zip.h @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/Function.h> +#include <AK/IterationDecision.h> +#include <AK/Span.h> +#include <AK/Stream.h> +#include <AK/String.h> +#include <string.h> + +namespace Archive { + +// NOTE: Due to the format of zip files compression is streamed and decompression is random access. + +static constexpr u8 end_of_central_directory_signature[] = { 0x50, 0x4b, 0x05, 0x06 }; // 'PK\x05\x06' +struct [[gnu::packed]] EndOfCentralDirectory { + u16 disk_number; + u16 central_directory_start_disk; + u16 disk_records_count; + u16 total_records_count; + u32 central_directory_size; + u32 central_directory_offset; + u16 comment_length; + const u8* comment; + + bool read(ReadonlyBytes buffer) + { + auto fields_size = sizeof(EndOfCentralDirectory) - sizeof(u8*); + if (buffer.size() < fields_size) + return false; + if (memcmp(buffer.data(), end_of_central_directory_signature, sizeof(end_of_central_directory_signature)) != 0) + return false; + memcpy(reinterpret_cast<void*>(&disk_number), buffer.data() + sizeof(end_of_central_directory_signature), fields_size); + comment = buffer.data() + sizeof(end_of_central_directory_signature) + fields_size; + return true; + } + + void write(OutputStream& stream) const + { + stream.write_or_error({ end_of_central_directory_signature, sizeof(end_of_central_directory_signature) }); + stream << disk_number; + stream << central_directory_start_disk; + stream << disk_records_count; + stream << total_records_count; + stream << central_directory_size; + stream << central_directory_offset; + stream << comment_length; + if (comment_length > 0) + stream.write_or_error({ comment, comment_length }); + } +}; + +static constexpr u8 central_directory_record_signature[] = { 0x50, 0x4b, 0x01, 0x02 }; // 'PK\x01\x02' +struct [[gnu::packed]] CentralDirectoryRecord { + u16 made_by_version; + u16 minimum_version; + u16 general_purpose_flags; + u16 compression_method; + u16 modification_time; + u16 modification_date; + u32 crc32; + u32 compressed_size; + u32 uncompressed_size; + u16 name_length; + u16 extra_data_length; + u16 comment_length; + u16 start_disk; + u16 internal_attributes; + u32 external_attributes; + u32 local_file_header_offset; + const u8* name; + const u8* extra_data; + const u8* comment; + + bool read(ReadonlyBytes buffer) + { + auto fields_size = sizeof(CentralDirectoryRecord) - (sizeof(u8*) * 3); + if (buffer.size() < fields_size) + return false; + if (memcmp(buffer.data(), central_directory_record_signature, sizeof(central_directory_record_signature)) != 0) + return false; + memcpy(reinterpret_cast<void*>(&made_by_version), buffer.data() + sizeof(central_directory_record_signature), fields_size); + name = buffer.data() + sizeof(central_directory_record_signature) + fields_size; + extra_data = name + name_length; + comment = extra_data + extra_data_length; + return true; + } + + void write(OutputStream& stream) const + { + stream.write_or_error({ central_directory_record_signature, sizeof(central_directory_record_signature) }); + stream << made_by_version; + stream << minimum_version; + stream << general_purpose_flags; + stream << compression_method; + stream << modification_time; + stream << modification_date; + stream << crc32; + stream << compressed_size; + stream << uncompressed_size; + stream << name_length; + stream << extra_data_length; + stream << comment_length; + stream << start_disk; + stream << internal_attributes; + stream << external_attributes; + stream << local_file_header_offset; + if (name_length > 0) + stream.write_or_error({ name, name_length }); + if (extra_data_length > 0) + stream.write_or_error({ extra_data, extra_data_length }); + if (comment_length > 0) + stream.write_or_error({ comment, comment_length }); + } + + [[nodiscard]] size_t size() const + { + return sizeof(central_directory_record_signature) + (sizeof(CentralDirectoryRecord) - (sizeof(u8*) * 3)) + name_length + extra_data_length + comment_length; + } +}; +static constexpr u32 zip_directory_external_attribute = 1 << 4; + +static constexpr u8 local_file_header_signature[] = { 0x50, 0x4b, 0x03, 0x04 }; // 'PK\x03\x04' +struct [[gnu::packed]] LocalFileHeader { + u16 minimum_version; + u16 general_purpose_flags; + u16 compression_method; + u16 modification_time; + u16 modification_date; + u32 crc32; + u32 compressed_size; + u32 uncompressed_size; + u16 name_length; + u16 extra_data_length; + const u8* name; + const u8* extra_data; + const u8* compressed_data; + + bool read(ReadonlyBytes buffer) + { + auto fields_size = sizeof(LocalFileHeader) - (sizeof(u8*) * 3); + if (buffer.size() < fields_size) + return false; + if (memcmp(buffer.data(), local_file_header_signature, sizeof(local_file_header_signature)) != 0) + return false; + memcpy(reinterpret_cast<void*>(&minimum_version), buffer.data() + sizeof(local_file_header_signature), fields_size); + name = buffer.data() + sizeof(local_file_header_signature) + fields_size; + extra_data = name + name_length; + compressed_data = extra_data + extra_data_length; + return true; + } + + void write(OutputStream& stream) const + { + stream.write_or_error({ local_file_header_signature, sizeof(local_file_header_signature) }); + stream << minimum_version; + stream << general_purpose_flags; + stream << compression_method; + stream << modification_time; + stream << modification_date; + stream << crc32; + stream << compressed_size; + stream << uncompressed_size; + stream << name_length; + stream << extra_data_length; + if (name_length > 0) + stream.write_or_error({ name, name_length }); + if (extra_data_length > 0) + stream.write_or_error({ extra_data, extra_data_length }); + if (compressed_size > 0) + stream.write_or_error({ compressed_data, compressed_size }); + } +}; + +enum ZipCompressionMethod : u16 { + Store = 0, + Shrink = 1, + Reduce1 = 2, + Reduce2 = 3, + Reduce3 = 4, + Reduce4 = 5, + Implode = 6, + Reserved = 7, + Deflate = 8 +}; + +struct ZipMember { + String name; + ReadonlyBytes compressed_data; // TODO: maybe the decompression/compression should be handled by LibArchive instead of the user? + ZipCompressionMethod compression_method; + u32 uncompressed_size; + u32 crc32; + bool is_directory; +}; + +class Zip { +public: + static Optional<Zip> try_create(const ReadonlyBytes& buffer); + bool for_each_member(Function<IterationDecision(const ZipMember&)>); + +private: + static bool find_end_of_central_directory_offset(const ReadonlyBytes&, size_t& offset); + + u16 member_count { 0 }; + size_t members_start_offset { 0 }; + ReadonlyBytes m_input_data; +}; + +} |