/* * Copyright (c) 2021-2022, Matthew Olsson * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include #include #include #include namespace PDF { struct Rectangle { float lower_left_x; float lower_left_y; float upper_right_x; float upper_right_y; float width() const { return upper_right_x - lower_left_x; } float height() const { return upper_right_y - lower_left_y; } }; struct Page { NonnullRefPtr resources; NonnullRefPtr contents; Rectangle media_box; Rectangle crop_box; float user_unit; int rotate; }; struct Destination { enum class Type { XYZ, Fit, FitH, FitV, FitR, FitB, FitBH, FitBV, }; Type type; Value page; Vector parameters; }; struct OutlineItem final : public RefCounted { RefPtr parent; NonnullRefPtrVector children; String title; i32 count { 0 }; Destination dest; Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec bool italic { false }; // bit 0 of 'F' in the PDF spec bool bold { false }; // bit 0 of 'F' in the PDF spec OutlineItem() = default; String to_string(int indent) const; }; struct OutlineDict final : public RefCounted { NonnullRefPtrVector children; u32 count { 0 }; OutlineDict() = default; }; class Document final : public RefCounted , public Weakable { public: static PDFErrorOr> create(ReadonlyBytes bytes); // If a security handler is present, it is the caller's responsibility to ensure // this document is unencrypted before calling this function. The user does not // need to handle the case where the user password is the empty string. PDFErrorOr initialize(); ALWAYS_INLINE RefPtr const& security_handler() const { return m_security_handler; } ALWAYS_INLINE RefPtr const& outline() const { return m_outline; } ALWAYS_INLINE RefPtr const& trailer() const { return m_trailer; } [[nodiscard]] PDFErrorOr get_or_load_value(u32 index); [[nodiscard]] u32 get_first_page_index() const; [[nodiscard]] u32 get_page_count() const; [[nodiscard]] PDFErrorOr get_page(u32 index); ALWAYS_INLINE Value get_value(u32 index) const { return m_values.get(index).value_or({}); } // Strips away the layer of indirection by turning indirect value // refs into the value they reference, and indirect values into // the value being wrapped. PDFErrorOr resolve(Value const& value); // Like resolve, but unwraps the Value into the given type. Accepts // any object type, and the three primitive Value types. template PDFErrorOr> resolve_to(Value const& value) { auto resolved = TRY(resolve(value)); if constexpr (IsSame) return resolved.get(); else if constexpr (IsSame) return resolved.get(); else if constexpr (IsSame) return resolved.get(); else if constexpr (IsSame) return resolved.get>(); else if constexpr (IsObject) return resolved.get>()->cast(); VERIFY_NOT_REACHED(); } private: explicit Document(NonnullRefPtr const& parser); // FIXME: Currently, to improve performance, we don't load any pages at Document // construction, rather we just load the page structure and populate // m_page_object_indices. However, we can be even lazier and defer page tree node // parsing, as good PDF writers will layout the page tree in a balanced tree to // improve lookup time. This would reduce the initial overhead by not loading // every page tree node of, say, a 1000+ page PDF file. PDFErrorOr build_page_tree(); PDFErrorOr add_page_tree_node_to_page_tree(NonnullRefPtr const& page_tree); PDFErrorOr build_outline(); PDFErrorOr> build_outline_item(NonnullRefPtr const& outline_item_dict); PDFErrorOr> build_outline_item_chain(Value const& first_ref, Value const& last_ref); PDFErrorOr create_destination_from_parameters(NonnullRefPtr); NonnullRefPtr m_parser; RefPtr m_catalog; RefPtr m_trailer; Vector m_page_object_indices; HashMap m_pages; HashMap m_values; RefPtr m_outline; RefPtr m_security_handler; }; } namespace AK { template<> struct Formatter : Formatter { ErrorOr format(FormatBuilder& builder, PDF::Rectangle const& rectangle) { return Formatter::format(builder, String::formatted("Rectangle {{ ll=({}, {}), ur=({}, {}) }}", rectangle.lower_left_x, rectangle.lower_left_y, rectangle.upper_right_x, rectangle.upper_right_y)); } }; template<> struct Formatter : Formatter { ErrorOr format(FormatBuilder& builder, PDF::Page const& page) { constexpr auto fmt_string = "Page {{\n resources={}\n contents={}\n media_box={}\n crop_box={}\n user_unit={}\n rotate={}\n}}"; auto str = String::formatted(fmt_string, page.resources->to_string(1), page.contents->to_string(1), page.media_box, page.crop_box, page.user_unit, page.rotate); return Formatter::format(builder, str); } }; template<> struct Formatter : Formatter { ErrorOr format(FormatBuilder& builder, PDF::Destination const& destination) { String type_str; switch (destination.type) { case PDF::Destination::Type::XYZ: type_str = "XYZ"; break; case PDF::Destination::Type::Fit: type_str = "Fit"; break; case PDF::Destination::Type::FitH: type_str = "FitH"; break; case PDF::Destination::Type::FitV: type_str = "FitV"; break; case PDF::Destination::Type::FitR: type_str = "FitR"; break; case PDF::Destination::Type::FitB: type_str = "FitB"; break; case PDF::Destination::Type::FitBH: type_str = "FitBH"; break; case PDF::Destination::Type::FitBV: type_str = "FitBV"; break; } StringBuilder param_builder; for (auto& param : destination.parameters) param_builder.appendff("{} ", param); auto str = String::formatted("{{ type={} page={} params={} }}", type_str, destination.page, param_builder.to_string()); return Formatter::format(builder, str); } }; template<> struct Formatter : Formatter { ErrorOr format(FormatBuilder& builder, PDF::OutlineItem const& item) { return Formatter::format(builder, item.to_string(0)); } }; template<> struct Formatter : Formatter { ErrorOr format(FormatBuilder& builder, PDF::OutlineDict const& dict) { StringBuilder child_builder; child_builder.append('['); for (auto& child : dict.children) child_builder.appendff("{}\n", child.to_string(2)); child_builder.append(" ]"); return Formatter::format(builder, String::formatted("OutlineDict {{\n count={}\n children={}\n}}", dict.count, child_builder.to_string())); } }; }