summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibPDF/Document.h
blob: d2d54949ed8478780d6e05dee86941853c0f3e8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/*
 * Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#pragma once

#include <AK/Format.h>
#include <AK/HashMap.h>
#include <AK/RefCounted.h>
#include <AK/Weakable.h>
#include <LibGfx/Color.h>
#include <LibPDF/DocumentParser.h>
#include <LibPDF/Encryption.h>
#include <LibPDF/Error.h>
#include <LibPDF/ObjectDerivatives.h>

namespace PDF {

struct Rectangle {
    float lower_left_x;
    float lower_left_y;
    float upper_right_x;
    float upper_right_y;

    float width() const { return upper_right_x - lower_left_x; }
    float height() const { return upper_right_y - lower_left_y; }
};

struct Page {
    NonnullRefPtr<DictObject> resources;
    NonnullRefPtr<Object> contents;
    Rectangle media_box;
    Rectangle crop_box;
    float user_unit;
    int rotate;
};

struct Destination {
    enum class Type {
        XYZ,
        Fit,
        FitH,
        FitV,
        FitR,
        FitB,
        FitBH,
        FitBV,
    };

    Type type;
    Optional<u32> page;
    Vector<float> parameters;
};

struct OutlineItem final : public RefCounted<OutlineItem> {
    RefPtr<OutlineItem> parent;
    NonnullRefPtrVector<OutlineItem> children;
    DeprecatedString title;
    i32 count { 0 };
    Destination dest;
    Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec
    bool italic { false };                         // bit 0 of 'F' in the PDF spec
    bool bold { false };                           // bit 0 of 'F' in the PDF spec

    OutlineItem() = default;

    DeprecatedString to_deprecated_string(int indent) const;
};

struct OutlineDict final : public RefCounted<OutlineDict> {
    NonnullRefPtrVector<OutlineItem> children;
    u32 count { 0 };

    OutlineDict() = default;
};

class Document final
    : public RefCounted<Document>
    , public Weakable<Document> {
public:
    static PDFErrorOr<NonnullRefPtr<Document>> create(ReadonlyBytes bytes);

    // If a security handler is present, it is the caller's responsibility to ensure
    // this document is unencrypted before calling this function. The user does not
    // need to handle the case where the user password is the empty string.
    PDFErrorOr<void> initialize();

    ALWAYS_INLINE RefPtr<SecurityHandler> const& security_handler() const { return m_security_handler; }

    ALWAYS_INLINE RefPtr<OutlineDict> const& outline() const { return m_outline; }

    ALWAYS_INLINE RefPtr<DictObject> const& trailer() const { return m_trailer; }

    [[nodiscard]] PDFErrorOr<Value> get_or_load_value(u32 index);

    [[nodiscard]] u32 get_first_page_index() const;

    [[nodiscard]] u32 get_page_count() const;

    [[nodiscard]] PDFErrorOr<Page> get_page(u32 index);

    ALWAYS_INLINE Value get_value(u32 index) const
    {
        return m_values.get(index).value_or({});
    }

    // Strips away the layer of indirection by turning indirect value
    // refs into the value they reference, and indirect values into
    // the value being wrapped.
    PDFErrorOr<Value> resolve(Value const& value);

    // Like resolve, but unwraps the Value into the given type. Accepts
    // any object type, and the three primitive Value types.
    template<IsValueType T>
    PDFErrorOr<UnwrappedValueType<T>> resolve_to(Value const& value)
    {
        auto resolved = TRY(resolve(value));

        if constexpr (IsSame<T, bool>)
            return resolved.get<bool>();
        else if constexpr (IsSame<T, int>)
            return resolved.get<int>();
        else if constexpr (IsSame<T, float>)
            return resolved.get<float>();
        else if constexpr (IsSame<T, Object>)
            return resolved.get<NonnullRefPtr<Object>>();
        else if constexpr (IsObject<T>)
            return resolved.get<NonnullRefPtr<Object>>()->cast<T>();

        VERIFY_NOT_REACHED();
    }

private:
    explicit Document(NonnullRefPtr<DocumentParser> const& parser);

    // FIXME: Currently, to improve performance, we don't load any pages at Document
    // construction, rather we just load the page structure and populate
    // m_page_object_indices. However, we can be even lazier and defer page tree node
    // parsing, as good PDF writers will layout the page tree in a balanced tree to
    // improve lookup time. This would reduce the initial overhead by not loading
    // every page tree node of, say, a 1000+ page PDF file.
    PDFErrorOr<void> build_page_tree();
    PDFErrorOr<void> add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> const& page_tree);

    PDFErrorOr<void> build_outline();
    PDFErrorOr<NonnullRefPtr<OutlineItem>> build_outline_item(NonnullRefPtr<DictObject> const& outline_item_dict, HashMap<u32, u32> const&);
    PDFErrorOr<NonnullRefPtrVector<OutlineItem>> build_outline_item_chain(Value const& first_ref, HashMap<u32, u32> const&);

    PDFErrorOr<Destination> create_destination_from_parameters(NonnullRefPtr<ArrayObject>, HashMap<u32, u32> const&);

    PDFErrorOr<NonnullRefPtr<Object>> get_inheritable_object(FlyString const& name, NonnullRefPtr<DictObject>);

    NonnullRefPtr<DocumentParser> m_parser;
    RefPtr<DictObject> m_catalog;
    RefPtr<DictObject> m_trailer;
    Vector<u32> m_page_object_indices;
    HashMap<u32, Page> m_pages;
    HashMap<u32, Value> m_values;
    RefPtr<OutlineDict> m_outline;
    RefPtr<SecurityHandler> m_security_handler;
};

}

namespace AK {

template<>
struct Formatter<PDF::Rectangle> : Formatter<FormatString> {
    ErrorOr<void> format(FormatBuilder& builder, PDF::Rectangle const& rectangle)
    {
        return Formatter<FormatString>::format(builder,
            "Rectangle {{ ll=({}, {}), ur=({}, {}) }}"sv,
            rectangle.lower_left_x,
            rectangle.lower_left_y,
            rectangle.upper_right_x,
            rectangle.upper_right_y);
    }
};

template<>
struct Formatter<PDF::Page> : Formatter<FormatString> {
    ErrorOr<void> format(FormatBuilder& builder, PDF::Page const& page)
    {
        return Formatter<FormatString>::format(builder,
            "Page {{\n  resources={}\n  contents={}\n  media_box={}\n  crop_box={}\n  user_unit={}\n  rotate={}\n}}"sv,
            page.resources->to_deprecated_string(1),
            page.contents->to_deprecated_string(1),
            page.media_box,
            page.crop_box,
            page.user_unit,
            page.rotate);
    }
};

template<>
struct Formatter<PDF::Destination> : Formatter<FormatString> {
    ErrorOr<void> format(FormatBuilder& builder, PDF::Destination const& destination)
    {
        StringView type_str;
        switch (destination.type) {
        case PDF::Destination::Type::XYZ:
            type_str = "XYZ"sv;
            break;
        case PDF::Destination::Type::Fit:
            type_str = "Fit"sv;
            break;
        case PDF::Destination::Type::FitH:
            type_str = "FitH"sv;
            break;
        case PDF::Destination::Type::FitV:
            type_str = "FitV"sv;
            break;
        case PDF::Destination::Type::FitR:
            type_str = "FitR"sv;
            break;
        case PDF::Destination::Type::FitB:
            type_str = "FitB"sv;
            break;
        case PDF::Destination::Type::FitBH:
            type_str = "FitBH"sv;
            break;
        case PDF::Destination::Type::FitBV:
            type_str = "FitBV"sv;
            break;
        }

        StringBuilder param_builder;
        TRY(Formatter<FormatString>::format(builder, "{{ type={} page="sv, type_str));
        if (destination.page.has_value())
            TRY(builder.put_literal("{}"sv));
        else
            TRY(builder.put_u64(destination.page.value()));
        for (auto& param : destination.parameters) {
            TRY(builder.put_f64(double(param)));
            TRY(builder.put_literal(" "sv));
        }
        return builder.put_literal("}}"sv);
    }
};

template<>
struct Formatter<PDF::OutlineItem> : Formatter<FormatString> {
    ErrorOr<void> format(FormatBuilder& builder, PDF::OutlineItem const& item)
    {
        return builder.put_string(item.to_deprecated_string(0));
    }
};

template<>
struct Formatter<PDF::OutlineDict> : Formatter<FormatString> {
    ErrorOr<void> format(FormatBuilder& builder, PDF::OutlineDict const& dict)
    {
        StringBuilder child_builder;
        child_builder.append('[');
        for (auto& child : dict.children)
            child_builder.appendff("{}\n", child.to_deprecated_string(2));
        child_builder.append("  ]"sv);

        return Formatter<FormatString>::format(builder,
            "OutlineDict {{\n  count={}\n  children={}\n}}"sv, dict.count, child_builder.to_deprecated_string());
    }
};

}