summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibPDF/Parser.cpp
diff options
context:
space:
mode:
authorMatthew Olsson <matthewcolsson@gmail.com>2021-05-02 18:53:07 -0700
committerAndreas Kling <kling@serenityos.org>2021-05-10 10:32:39 +0200
commit3aeaceb72729097c8ba8c59f8beab2cff87e02b1 (patch)
tree1223446a0fa9e794cb67baff24f3429de32748ad /Userland/Libraries/LibPDF/Parser.cpp
parent8c745ad0d9198db1cd078dff573353457294a840 (diff)
downloadserenity-3aeaceb72729097c8ba8c59f8beab2cff87e02b1.zip
LibPDF: Parse nested Page Tree structures
We now follow nested page tree nodes to find all of the actual page dicts, whereas previously we just assumed the root level page tree node contained all of the page children directly.
Diffstat (limited to 'Userland/Libraries/LibPDF/Parser.cpp')
-rw-r--r--Userland/Libraries/LibPDF/Parser.cpp48
1 files changed, 46 insertions, 2 deletions
diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp
index eb37bf47d7..2e7d5ab57e 100644
--- a/Userland/Libraries/LibPDF/Parser.cpp
+++ b/Userland/Libraries/LibPDF/Parser.cpp
@@ -363,14 +363,14 @@ Value Parser::parse_number()
}
}
+ consume_whitespace();
+
auto string = String(m_reader.bytes().slice(start_offset, m_reader.offset() - start_offset));
float f = strtof(string.characters(), nullptr);
if (is_float)
return Value(f);
VERIFY(floorf(f) == f);
- consume_whitespace();
-
return Value(static_cast<int>(f));
}
@@ -567,6 +567,50 @@ NonnullRefPtr<DictObject> Parser::parse_dict()
return make_object<DictObject>(map);
}
+RefPtr<DictObject> Parser::conditionally_parse_page_tree_node_at_offset(size_t offset)
+{
+ m_reader.move_to(offset);
+ parse_number();
+ parse_number();
+ VERIFY(m_reader.matches("obj"));
+ m_reader.move_by(3);
+ consume_whitespace();
+
+ consume('<');
+ consume('<');
+ consume_whitespace();
+ HashMap<FlyString, Value> map;
+
+ while (true) {
+ if (m_reader.matches(">>"))
+ break;
+ auto name = parse_name();
+ auto name_string = name->name();
+ if (!name_string.is_one_of("Type", "Parent", "Kids", "Count")) {
+ // This is a page, not a page tree node
+ return {};
+ }
+ auto value = parse_value();
+ if (name_string == "Type") {
+ if (!value.is_object())
+ return {};
+ auto type_object = value.as_object();
+ if (!type_object->is_name())
+ return {};
+ auto type_name = object_cast<NameObject>(type_object);
+ if (type_name->name() != "Pages")
+ return {};
+ }
+ map.set(name->name(), value);
+ }
+
+ consume('>');
+ consume('>');
+ consume_whitespace();
+
+ return make_object<DictObject>(map);
+}
+
NonnullRefPtr<StreamObject> Parser::parse_stream(NonnullRefPtr<DictObject> dict)
{
VERIFY(m_reader.matches("stream"));