summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibPDF
diff options
context:
space:
mode:
authorWesley Moret <wesmoret98@gmail.com>2021-07-14 21:48:09 -0400
committerAndreas Kling <kling@serenityos.org>2021-07-16 20:44:10 +0200
commit1b8f73b6b3afd8d78172fcd30b95db67f259f58a (patch)
treeea54523cdbfd16106fcdc71babf5105880a51f87 /Userland/Libraries/LibPDF
parent5d4d70355e15bd7d3e608d72fb36f4538bcdaac0 (diff)
downloadserenity-1b8f73b6b3afd8d78172fcd30b95db67f259f58a.zip
LibPDF: Fix treating not finding the linearized dict as a fatal error
We now try to parse the first indirect value and see if it's the `Linearization Parameter Dictionary`. if it's not, we fallback to reading the xref table from the end of the document
Diffstat (limited to 'Userland/Libraries/LibPDF')
-rw-r--r--Userland/Libraries/LibPDF/Parser.cpp31
-rw-r--r--Userland/Libraries/LibPDF/Parser.h8
2 files changed, 26 insertions, 13 deletions
diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp
index 51f59c99b6..263ea0a270 100644
--- a/Userland/Libraries/LibPDF/Parser.cpp
+++ b/Userland/Libraries/LibPDF/Parser.cpp
@@ -45,14 +45,18 @@ bool Parser::initialize()
if (!parse_header())
return {};
- if (!initialize_linearization_dict())
+ const auto result = initialize_linearization_dict();
+ if (result == LinearizationResult::Error)
return {};
+ if (result == LinearizationResult::NotLinearized)
+ return initialize_non_linearized_xref_table();
+
bool is_linearized = m_linearization_dictionary.has_value();
if (is_linearized) {
// The file may have been linearized at one point, but could have been updated afterwards,
// which means it is no longer a linearized PDF file.
- is_linearized = is_linearized && m_linearization_dictionary.value().length_of_file == m_reader.bytes().size();
+ is_linearized = m_linearization_dictionary.value().length_of_file == m_reader.bytes().size();
if (!is_linearized) {
// FIXME: The file shouldn't be treated as linearized, yet the xref tables are still
@@ -113,21 +117,24 @@ bool Parser::parse_header()
return true;
}
-bool Parser::initialize_linearization_dict()
+Parser::LinearizationResult Parser::initialize_linearization_dict()
{
// parse_header() is called immediately before this, so we are at the right location
auto dict_value = m_document->resolve(parse_indirect_value());
if (!dict_value || !dict_value.is_object())
- return false;
+ return LinearizationResult::Error;
auto dict_object = dict_value.as_object();
if (!dict_object->is_dict())
- return false;
+ return LinearizationResult::NotLinearized;
auto dict = object_cast<DictObject>(dict_object);
+ if (!dict->contains(CommonNames::Linearized))
+ return LinearizationResult::NotLinearized;
+
if (!dict->contains(CommonNames::L, CommonNames::H, CommonNames::O, CommonNames::E, CommonNames::N, CommonNames::T))
- return true;
+ return LinearizationResult::Error;
auto length_of_file = dict->get_value(CommonNames::L);
auto hint_table = dict->get_value(CommonNames::H);
@@ -144,17 +151,17 @@ bool Parser::initialize_linearization_dict()
|| !number_of_pages.is_int_type<u16>()
|| !offset_of_main_xref_table.is_int_type<u32>()
|| (first_page && !first_page.is_int_type<u32>())) {
- return true;
+ return LinearizationResult::Error;
}
auto hint_table_object = hint_table.as_object();
if (!hint_table_object->is_array())
- return true;
+ return LinearizationResult::Error;
auto hint_table_array = object_cast<ArrayObject>(hint_table_object);
auto hint_table_size = hint_table_array->size();
if (hint_table_size != 2 && hint_table_size != 4)
- return true;
+ return LinearizationResult::Error;
auto primary_hint_stream_offset = hint_table_array->at(0);
auto primary_hint_stream_length = hint_table_array->at(1);
@@ -170,7 +177,7 @@ bool Parser::initialize_linearization_dict()
|| !primary_hint_stream_length.is_int_type<u32>()
|| (overflow_hint_stream_offset && !overflow_hint_stream_offset.is_int_type<u32>())
|| (overflow_hint_stream_length && !overflow_hint_stream_length.is_int_type<u32>())) {
- return true;
+ return LinearizationResult::Error;
}
m_linearization_dictionary = LinearizationDictionary {
@@ -186,7 +193,7 @@ bool Parser::initialize_linearization_dict()
first_page ? first_page.as_int_type<u32>() : NumericLimits<u32>::max(),
};
- return true;
+ return LinearizationResult::Linearized;
}
bool Parser::initialize_linearized_xref_table()
@@ -1023,7 +1030,7 @@ RefPtr<StreamObject> Parser::parse_stream(NonnullRefPtr<DictObject> dict)
ReadonlyBytes bytes;
auto maybe_length = dict->get(CommonNames::Length);
- if (maybe_length.has_value()) {
+ if (maybe_length.has_value() && (!maybe_length->is_ref() || m_xref_table)) {
// The PDF writer has kindly provided us with the direct length of the stream
m_reader.save();
auto length = m_document->resolve_to<int>(maybe_length.value());
diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h
index 6dbe3b2f5f..2dba1a0515 100644
--- a/Userland/Libraries/LibPDF/Parser.h
+++ b/Userland/Libraries/LibPDF/Parser.h
@@ -18,6 +18,12 @@ class Document;
class Parser final : public RefCounted<Parser> {
public:
+ enum class LinearizationResult {
+ Error,
+ NotLinearized,
+ Linearized,
+ };
+
static Vector<Command> parse_graphics_commands(ReadonlyBytes const&);
Parser(Badge<Document>, ReadonlyBytes const&);
@@ -83,7 +89,7 @@ private:
explicit Parser(ReadonlyBytes const&);
bool parse_header();
- bool initialize_linearization_dict();
+ LinearizationResult initialize_linearization_dict();
bool initialize_linearized_xref_table();
bool initialize_non_linearized_xref_table();
bool initialize_hint_tables();