From cb1a7cc7211b21a160a4a2faa1cf3bb0ac30a698 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Fri, 16 Dec 2022 00:14:40 +0800 Subject: LibPDF: Simplify outline construction While the Outline Items making up the document's Outline have all sorts of cross-references (parent, first/last chlid, next/previous sibling, etc), not all documents out there have fully-consistent references. Our implementation already discarded some of that information too (e.g., /Parent and /Prev were never read), and trusted that /First and /Next were good enough to traverse the whole hierarchy. Where the current implementation failed was in assuming that /Last was also a good source of information. There are documents out there were /Last also points to dead ends, and were therefore causing a crash when we verified that the last child found on a chain was the /Last child declared by the parent. To fix this I'm simply removing the check, and simplifying the function call to remove any references to /Last. This way we affirm our commitment to /First and /Next as the main sources of information. --- Userland/Libraries/LibPDF/Document.cpp | 17 ++++++++--------- Userland/Libraries/LibPDF/Document.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'Userland/Libraries/LibPDF') diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index e25dc0e6a0..5dac546ac5 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -211,9 +211,8 @@ PDFErrorOr Document::build_outline() return {}; auto first_ref = outline_dict->get_value(CommonNames::First); - auto last_ref = outline_dict->get_value(CommonNames::Last); - auto children = TRY(build_outline_item_chain(first_ref, last_ref)); + auto children = TRY(build_outline_item_chain(first_ref)); m_outline = adopt_ref(*new OutlineDict()); m_outline->children = move(children); @@ -273,9 +272,7 @@ PDFErrorOr> Document::build_outline_item(NonnullRefPt if (outline_item_dict->contains(CommonNames::First)) { VERIFY(outline_item_dict->contains(CommonNames::Last)); auto first_ref = outline_item_dict->get_value(CommonNames::First); - auto last_ref = outline_item_dict->get_value(CommonNames::Last); - - auto children = TRY(build_outline_item_chain(first_ref, last_ref)); + auto children = TRY(build_outline_item_chain(first_ref)); outline_item->children = move(children); } @@ -326,10 +323,14 @@ PDFErrorOr> Document::build_outline_item(NonnullRefPt return outline_item; } -PDFErrorOr> Document::build_outline_item_chain(Value const& first_ref, Value const& last_ref) +PDFErrorOr> Document::build_outline_item_chain(Value const& first_ref) { + // We used to receive a last_ref parameter, which was what the parent of this chain + // thought was this chain's last child. There are documents out there in the wild + // where this cross-references don't match though, and it seems like simply following + // the /First and /Next links is the way to go to construct the whole Outline + // (we already ignore the /Parent attribute too, which can also be out of sync). VERIFY(first_ref.has()); - VERIFY(last_ref.has()); NonnullRefPtrVector children; @@ -352,8 +353,6 @@ PDFErrorOr> Document::build_outline_item_chain( current_child_dict = move(next_child_dict); } - VERIFY(last_ref.as_ref_index() == current_child_index); - return children; } diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index 99a41aa641..e2968b36bd 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -146,7 +146,7 @@ private: PDFErrorOr build_outline(); PDFErrorOr> build_outline_item(NonnullRefPtr const& outline_item_dict); - PDFErrorOr> build_outline_item_chain(Value const& first_ref, Value const& last_ref); + PDFErrorOr> build_outline_item_chain(Value const& first_ref); PDFErrorOr create_destination_from_parameters(NonnullRefPtr); -- cgit v1.2.3