diff options
author | Matthew Olsson <matthewcolsson@gmail.com> | 2021-05-23 16:37:33 -0700 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2021-05-25 00:24:09 +0430 |
commit | a08922d2f6fa3d86aa8af0bbbfba95d657bea634 (patch) | |
tree | 4fc0d09dbf37b8ba52d16f0cd53b1ffd24b8837b /Userland/Libraries/LibPDF | |
parent | 777c232e162b1ea1d563821fb8ded3c0450db1a3 (diff) | |
download | serenity-a08922d2f6fa3d86aa8af0bbbfba95d657bea634.zip |
LibPDF: Parse outline structures
Diffstat (limited to 'Userland/Libraries/LibPDF')
-rw-r--r-- | Userland/Libraries/LibPDF/Document.cpp | 145 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Document.h | 111 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Object.h | 3 |
3 files changed, 257 insertions, 2 deletions
diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index e7a45a8519..ddafa87b39 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -9,6 +9,30 @@ namespace PDF { +String OutlineItem::to_string(int indent) const +{ + auto indent_str = String::repeated(" ", indent + 1); + + StringBuilder child_builder; + child_builder.append('['); + for (auto& child : children) + child_builder.appendff("{}\n", child.to_string(indent + 1)); + child_builder.appendff("{}]", indent_str); + + StringBuilder builder; + builder.append("OutlineItem {{\n"); + builder.appendff("{}title={}\n", indent_str, title); + builder.appendff("{}count={}\n", indent_str, count); + builder.appendff("{}dest={}\n", indent_str, dest); + builder.appendff("{}color={}\n", indent_str, color); + builder.appendff("{}italic={}\n", indent_str, italic); + builder.appendff("{}bold={}\n", indent_str, bold); + builder.appendff("{}children={}\n", indent_str, child_builder.to_string()); + builder.appendff("{}}}", String::repeated(" ", indent)); + + return builder.to_string(); +} + Document::Document(const ReadonlyBytes& bytes) : m_parser(Parser({}, bytes)) { @@ -22,6 +46,7 @@ Document::Document(const ReadonlyBytes& bytes) m_catalog = m_trailer->get_dict(this, "Root"); build_page_tree(); + build_outline(); } Value Document::get_or_load_value(u32 index) @@ -153,4 +178,124 @@ void Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tr m_page_object_indices.append(value.as_ref_index()); } +void Document::build_outline() +{ + if (!m_catalog->contains("Outlines")) + return; + + auto outline_dict = m_catalog->get_dict(this, "Outlines"); + if (!outline_dict->contains("First")) + return; + + VERIFY(outline_dict->contains("Last")); + + auto first_ref = outline_dict->get_value("First"); + auto last_ref = outline_dict->get_value("Last"); + + auto children = build_outline_item_chain(first_ref, last_ref); + + m_outline = adopt_ref(*new OutlineDict()); + m_outline->children = move(children); + + if (outline_dict->contains("Count")) + m_outline->count = outline_dict->get_value("Count").as_int(); +} + +NonnullRefPtr<OutlineItem> Document::build_outline_item(NonnullRefPtr<DictObject> outline_item_dict) +{ + auto outline_item = adopt_ref(*new OutlineItem {}); + + if (outline_item_dict->contains("First")) { + VERIFY(outline_item_dict->contains("Last")); + auto first_ref = outline_item_dict->get_value("First"); + auto last_ref = outline_item_dict->get_value("Last"); + + auto children = build_outline_item_chain(first_ref, last_ref); + outline_item->children = move(children); + } + + outline_item->title = outline_item_dict->get_string(this, "Title")->string(); + + if (outline_item_dict->contains("Count")) + outline_item->count = outline_item_dict->get_value("Count").as_int(); + + if (outline_item_dict->contains("Dest")) { + auto dest_arr = outline_item_dict->get_array(this, "Dest"); + auto page_ref = dest_arr->at(0); + auto type_name = dest_arr->get_name_at(this, 1)->name(); + + Vector<float> parameters; + for (size_t i = 2; i < dest_arr->size(); i++) + parameters.append(dest_arr->at(i).to_float()); + + Destination::Type type; + if (type_name == "XYZ") { + type = Destination::Type::XYZ; + } else if (type_name == "Fit") { + type = Destination::Type::Fit; + } else if (type_name == "FitH") { + type = Destination::Type::FitH; + } else if (type_name == "FitV") { + type = Destination::Type::FitV; + } else if (type_name == "FitR") { + type = Destination::Type::FitR; + } else if (type_name == "FitB") { + type = Destination::Type::FitB; + } else if (type_name == "FitBH") { + type = Destination::Type::FitBH; + } else if (type_name == "FitBV") { + type = Destination::Type::FitBV; + } else { + VERIFY_NOT_REACHED(); + } + + outline_item->dest = Destination { type, page_ref, parameters }; + } + + if (outline_item_dict->contains("C")) { + auto color_array = outline_item_dict->get_array(this, "C"); + auto r = static_cast<int>(255.0f * color_array->at(0).as_float()); + auto g = static_cast<int>(255.0f * color_array->at(1).as_float()); + auto b = static_cast<int>(255.0f * color_array->at(2).as_float()); + outline_item->color = Color(r, g, b); + } + + if (outline_item_dict->contains("F")) { + auto bitfield = outline_item_dict->get_value("F").as_int(); + outline_item->italic = bitfield & 0x1; + outline_item->bold = bitfield & 0x2; + } + + return outline_item; +} + +NonnullRefPtrVector<OutlineItem> Document::build_outline_item_chain(const Value& first_ref, const Value& last_ref) +{ + VERIFY(first_ref.is_ref()); + VERIFY(last_ref.is_ref()); + + NonnullRefPtrVector<OutlineItem> children; + + auto first_dict = object_cast<DictObject>(get_or_load_value(first_ref.as_ref_index()).as_object()); + auto first = build_outline_item(first_dict); + children.append(first); + + auto current_child_dict = first_dict; + u32 current_child_index = first_ref.as_ref_index(); + + while (current_child_dict->contains("Next")) { + auto next_child_dict_ref = current_child_dict->get_value("Next"); + current_child_index = next_child_dict_ref.as_ref_index(); + auto next_child_dict = object_cast<DictObject>(get_or_load_value(current_child_index).as_object()); + auto next_child = build_outline_item(next_child_dict); + children.append(next_child); + + current_child_dict = next_child_dict; + } + + VERIFY(last_ref.as_ref_index() == current_child_index); + + return children; +} + } diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index 343f76f405..a52c1eda47 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -9,6 +9,7 @@ #include <AK/Format.h> #include <AK/HashMap.h> #include <AK/RefCounted.h> +#include <LibGfx/Color.h> #include <LibPDF/Object.h> #include <LibPDF/Parser.h> #include <LibPDF/XRefTable.h> @@ -31,13 +32,52 @@ struct Page { int rotate; }; +struct Destination { + enum class Type { + XYZ, + Fit, + FitH, + FitV, + FitR, + FitB, + FitBH, + FitBV, + }; + + Type type; + Value page; + Vector<float> parameters; +}; + +struct OutlineItem final : public RefCounted<OutlineItem> { + RefPtr<OutlineItem> parent; + NonnullRefPtrVector<OutlineItem> children; + String title; + i32 count { 0 }; + Destination dest; + Gfx::Color color { Color::NamedColor::Black }; // 'C' in the PDF spec + bool italic { false }; // bit 0 of 'F' in the PDF spec + bool bold { false }; // bit 0 of 'F' in the PDF spec + + OutlineItem() = default; + + String to_string(int indent) const; +}; + +struct OutlineDict final : public RefCounted<OutlineDict> { + NonnullRefPtrVector<OutlineItem> children; + u32 count { 0 }; + + OutlineDict() = default; +}; + class Document final : public RefCounted<Document> { public: explicit Document(const ReadonlyBytes& bytes); ALWAYS_INLINE const XRefTable& xref_table() const { return m_xref_table; } - ALWAYS_INLINE const DictObject& trailer() const { return *m_trailer; } + ALWAYS_INLINE const RefPtr<OutlineDict>& outline() const { return m_outline; } [[nodiscard]] Value get_or_load_value(u32 index); @@ -92,6 +132,10 @@ private: void build_page_tree(); void add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tree); + void build_outline(); + NonnullRefPtr<OutlineItem> build_outline_item(NonnullRefPtr<DictObject> outline_item_dict); + NonnullRefPtrVector<OutlineItem> build_outline_item_chain(const Value& first_ref, const Value& last_ref); + Parser m_parser; XRefTable m_xref_table; RefPtr<DictObject> m_trailer; @@ -99,6 +143,7 @@ private: Vector<u32> m_page_object_indices; HashMap<u32, Page> m_pages; HashMap<u32, Value> m_values; + RefPtr<OutlineDict> m_outline; }; } @@ -134,4 +179,68 @@ struct Formatter<PDF::Page> : Formatter<StringView> { } }; +template<> +struct Formatter<PDF::Destination> : Formatter<StringView> { + void format(FormatBuilder& builder, const PDF::Destination& destination) + { + String type_str; + switch (destination.type) { + case PDF::Destination::Type::XYZ: + type_str = "XYZ"; + break; + case PDF::Destination::Type::Fit: + type_str = "Fit"; + break; + case PDF::Destination::Type::FitH: + type_str = "FitH"; + break; + case PDF::Destination::Type::FitV: + type_str = "FitV"; + break; + case PDF::Destination::Type::FitR: + type_str = "FitR"; + break; + case PDF::Destination::Type::FitB: + type_str = "FitB"; + break; + case PDF::Destination::Type::FitBH: + type_str = "FitBH"; + break; + case PDF::Destination::Type::FitBV: + type_str = "FitBV"; + break; + } + + StringBuilder param_builder; + for (auto& param : destination.parameters) + param_builder.appendff("{} ", param); + + auto str = String::formatted("{{ type={} page={} params={} }}", type_str, destination.page, param_builder.to_string()); + Formatter<StringView>::format(builder, str); + } +}; + +template<> +struct Formatter<PDF::OutlineItem> : Formatter<StringView> { + void format(FormatBuilder& builder, const PDF::OutlineItem& item) + { + Formatter<StringView>::format(builder, item.to_string(0)); + } +}; + +template<> +struct Formatter<PDF::OutlineDict> : Formatter<StringView> { + void format(FormatBuilder& builder, const PDF::OutlineDict& dict) + { + StringBuilder child_builder; + child_builder.append('['); + for (auto& child : dict.children) + child_builder.appendff("{}\n", child.to_string(2)); + child_builder.append(" ]"); + + Formatter<StringView>::format(builder, + String::formatted("OutlineDict {{\n count={}\n children={}\n}}", dict.count, child_builder.to_string())); + } +}; + } diff --git a/Userland/Libraries/LibPDF/Object.h b/Userland/Libraries/LibPDF/Object.h index e75d38c0a3..048ef09f8d 100644 --- a/Userland/Libraries/LibPDF/Object.h +++ b/Userland/Libraries/LibPDF/Object.h @@ -67,7 +67,7 @@ public: ~NameObject() override = default; - [[nodiscard]] ALWAYS_INLINE FlyString name() const { return m_name; } + [[nodiscard]] ALWAYS_INLINE const FlyString& name() const { return m_name; } ALWAYS_INLINE bool is_name() const override { return true; } ALWAYS_INLINE const char* type_name() const override { return "name"; } @@ -86,6 +86,7 @@ public: ~ArrayObject() override = default; + [[nodiscard]] ALWAYS_INLINE size_t size() const { return m_elements.size(); } [[nodiscard]] ALWAYS_INLINE Vector<Value> elements() const { return m_elements; } ALWAYS_INLINE auto begin() const { return m_elements.begin(); } |