diff options
author | Julian Offenhäuser <metalvoidzz@gmail.com> | 2022-08-20 15:17:15 +0200 |
---|---|---|
committer | Sam Atkins <atkinssj@gmail.com> | 2022-09-17 10:07:14 +0100 |
commit | 77f5f7a6f452d4941fef123f216ecf41fbb33a04 (patch) | |
tree | 3c1551348cf224e3d41b0283f910277249fa0412 /Userland | |
parent | 6225c0325648e50b12982aab8c9df8291f9b7209 (diff) | |
download | serenity-77f5f7a6f452d4941fef123f216ecf41fbb33a04.zip |
LibPDF: Support parsing page tree nodes that are in object streams
conditionally_parse_page_tree_node used to assume that the xref table
contained a byte offset, even for compressed objects. It now uses the
common facilities for parsing objects, at the expense of some
performance.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibPDF/DocumentParser.cpp | 61 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/ObjectDerivatives.h | 3 |
2 files changed, 21 insertions, 43 deletions
diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 95e8c3b3a9..71a645ddec 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -644,51 +644,26 @@ bool DocumentParser::navigate_to_after_startxref() PDFErrorOr<RefPtr<DictObject>> DocumentParser::conditionally_parse_page_tree_node(u32 object_index) { - VERIFY(m_xref_table->has_object(object_index)); - auto byte_offset = m_xref_table->byte_offset_for_object(object_index); - - m_reader.move_to(byte_offset); - TRY(parse_number()); - TRY(parse_number()); - if (!m_reader.matches("obj")) - return error(String::formatted("Invalid page tree offset {}", object_index)); - - m_reader.move_by(3); - m_reader.consume_whitespace(); - - VERIFY(m_reader.consume('<') && m_reader.consume('<')); - - m_reader.consume_whitespace(); - HashMap<FlyString, Value> map; - - while (true) { - if (m_reader.matches(">>")) - break; - auto name = TRY(parse_name()); - auto name_string = name->name(); - if (!name_string.is_one_of(CommonNames::Type, CommonNames::Parent, CommonNames::Kids, CommonNames::Count)) { - // This is a page, not a page tree node - return RefPtr<DictObject> {}; - } - - auto value = TRY(parse_value()); - if (name_string == CommonNames::Type) { - if (!value.has<NonnullRefPtr<Object>>()) - return RefPtr<DictObject> {}; - auto type_object = value.get<NonnullRefPtr<Object>>(); - if (!type_object->is<NameObject>()) - return RefPtr<DictObject> {}; - auto type_name = type_object->cast<NameObject>(); - if (type_name->name() != CommonNames::Pages) - return RefPtr<DictObject> {}; - } - map.set(name->name(), value); - } + auto dict_value = TRY(parse_object_with_index(object_index)); + auto dict_object = dict_value.get<NonnullRefPtr<Object>>(); + if (!dict_object->is<DictObject>()) + return error(String::formatted("Invalid page tree with xref index {}", object_index)); - VERIFY(m_reader.consume('>') && m_reader.consume('>')); - m_reader.consume_whitespace(); + auto dict = dict_object->cast<DictObject>(); + if (!dict->contains_any_of(CommonNames::Type, CommonNames::Parent, CommonNames::Kids, CommonNames::Count)) + // This is a page, not a page tree node + return RefPtr<DictObject> {}; + + if (!dict->contains(CommonNames::Type)) + return RefPtr<DictObject> {}; + auto type_object = TRY(dict->get_object(m_document, CommonNames::Type)); + if (!type_object->is<NameObject>()) + return RefPtr<DictObject> {}; + auto type_name = type_object->cast<NameObject>(); + if (type_name->name() != CommonNames::Pages) + return RefPtr<DictObject> {}; - return make_object<DictObject>(map); + return dict; } } diff --git a/Userland/Libraries/LibPDF/ObjectDerivatives.h b/Userland/Libraries/LibPDF/ObjectDerivatives.h index c719ef2f27..cdb8dd7ef7 100644 --- a/Userland/Libraries/LibPDF/ObjectDerivatives.h +++ b/Userland/Libraries/LibPDF/ObjectDerivatives.h @@ -113,6 +113,9 @@ public: template<typename... Args> bool contains(Args&&... keys) const { return (m_map.contains(keys) && ...); } + template<typename... Args> + bool contains_any_of(Args&&... keys) const { return (m_map.contains(keys) || ...); } + ALWAYS_INLINE Optional<Value> get(FlyString const& key) const { return m_map.get(key); } Value get_value(FlyString const& key) const |