diff options
author | Rodrigo Tobar <rtobarc@gmail.com> | 2023-01-06 00:33:24 +0800 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2023-01-06 18:06:41 +0100 |
commit | 5420261347163126430d9818452f5e774d7b1fca (patch) | |
tree | 8c51556d6e582c3582d72431139076ea1ae10489 /Userland | |
parent | 8c79f0e0cfdc2be6a4f81ae02840f0d5f8d78f53 (diff) | |
download | serenity-5420261347163126430d9818452f5e774d7b1fca.zip |
LibPDF: Implement name tree lookups
Name Trees are hierarchical, string-keyed, sorted-by-key dictionary
structures in PDF where each node (except the root) specifies the bounds
of the values it holds, and either its kids (more nodes) or the
key/value pairs it contains.
This commit implements a series of lookup calls for finding a key in
such name trees. This implementation follows the tree as needed on each
lookup, but if that becomes inefficient in the long run we can switch to
creating a HashMap with all the contents, which as a drawback will
require more memory.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibPDF/CommonNames.h | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Document.cpp | 40 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Document.h | 4 |
3 files changed, 46 insertions, 0 deletions
diff --git a/Userland/Libraries/LibPDF/CommonNames.h b/Userland/Libraries/LibPDF/CommonNames.h index 85f878c4be..a3832ba2b3 100644 --- a/Userland/Libraries/LibPDF/CommonNames.h +++ b/Userland/Libraries/LibPDF/CommonNames.h @@ -93,12 +93,14 @@ A(Length1) \ A(Length2) \ A(Length3) \ + A(Limits) \ A(Linearized) \ A(ML) \ A(Matrix) \ A(MediaBox) \ A(MissingWidth) \ A(N) \ + A(Names) \ A(Next) \ A(O) \ A(OP) \ diff --git a/Userland/Libraries/LibPDF/Document.cpp b/Userland/Libraries/LibPDF/Document.cpp index a8f85e8a23..acc43ef192 100644 --- a/Userland/Libraries/LibPDF/Document.cpp +++ b/Userland/Libraries/LibPDF/Document.cpp @@ -199,6 +199,46 @@ PDFErrorOr<void> Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObj return {}; } +PDFErrorOr<NonnullRefPtr<Object>> Document::find_in_name_tree(NonnullRefPtr<DictObject> tree, FlyString name) +{ + if (tree->contains(CommonNames::Kids)) { + return find_in_name_tree_nodes(tree->get_array(CommonNames::Kids), name); + } + if (!tree->contains(CommonNames::Names)) + return Error { Error::Type::MalformedPDF, "name tree has neither Kids nor Names" }; + auto key_value_names_array = TRY(tree->get_array(this, CommonNames::Names)); + return find_in_key_value_array(key_value_names_array, name); +} + +PDFErrorOr<NonnullRefPtr<Object>> Document::find_in_name_tree_nodes(NonnullRefPtr<ArrayObject> siblings, FlyString name) +{ + for (size_t i = 0; i < siblings->size(); i++) { + auto sibling = TRY(resolve_to<DictObject>(siblings->at(i))); + auto limits = sibling->get_array(CommonNames::Limits); + if (limits->size() != 2) + return Error { Error::Type::MalformedPDF, "Expected 2-element Limits array" }; + auto start = limits->get_string_at(0); + auto end = limits->get_string_at(1); + if (start->string() <= name && end->string() >= name) { + return find_in_name_tree(sibling, name); + } + } + return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Didn't find node in name tree containing name {}", name) }; +} + +PDFErrorOr<NonnullRefPtr<Object>> Document::find_in_key_value_array(NonnullRefPtr<ArrayObject> key_value_array, FlyString name) +{ + if (key_value_array->size() % 2 == 1) + return Error { Error::Type::MalformedPDF, "key/value array has dangling key" }; + for (size_t i = 0; i < key_value_array->size() / 2; i++) { + auto key = key_value_array->get_string_at(2 * i); + if (key->string() == name) { + return key_value_array->get_object_at(this, 2 * i + 1); + } + } + return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Didn't find expected name {} in key/value array", name) }; +} + PDFErrorOr<void> Document::build_outline() { if (!m_catalog->contains(CommonNames::Outlines)) diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index 7557f677ea..3b183ceab9 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -139,6 +139,10 @@ private: PDFErrorOr<NonnullRefPtr<Object>> get_inheritable_object(FlyString const& name, NonnullRefPtr<DictObject>); + PDFErrorOr<NonnullRefPtr<Object>> find_in_name_tree(NonnullRefPtr<DictObject> root, FlyString name); + PDFErrorOr<NonnullRefPtr<Object>> find_in_name_tree_nodes(NonnullRefPtr<ArrayObject> siblings, FlyString name); + PDFErrorOr<NonnullRefPtr<Object>> find_in_key_value_array(NonnullRefPtr<ArrayObject> key_value_array, FlyString name); + NonnullRefPtr<DocumentParser> m_parser; RefPtr<DictObject> m_catalog; RefPtr<DictObject> m_trailer; |