/* * Copyright (c) 2021-2022, Matthew Olsson * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include #include namespace PDF { class Document; class Parser final : public RefCounted { public: enum class LinearizationResult { NotLinearized, Linearized, }; static PDFErrorOr> parse_graphics_commands(ReadonlyBytes); Parser(Badge, ReadonlyBytes); [[nodiscard]] ALWAYS_INLINE RefPtr const& trailer() const { return m_trailer; } void set_document(WeakPtr const&); // Parses the header and initializes the xref table and trailer PDFErrorOr initialize(); PDFErrorOr parse_object_with_index(u32 index); // Specialized version of parse_dict which aborts early if the dict being parsed // is not a page object PDFErrorOr> conditionally_parse_page_tree_node(u32 object_index); private: struct LinearizationDictionary { u32 length_of_file { 0 }; u32 primary_hint_stream_offset { 0 }; u32 primary_hint_stream_length { 0 }; u32 overflow_hint_stream_offset { 0 }; u32 overflow_hint_stream_length { 0 }; u32 first_page_object_number { 0 }; u32 offset_of_first_page_end { 0 }; u16 number_of_pages { 0 }; u32 offset_of_main_xref_table { 0 }; u32 first_page { 0 }; // The page to initially open (I think, the spec isn't all that clear here) }; struct PageOffsetHintTable { u32 least_number_of_objects_in_a_page { 0 }; u32 location_of_first_page_object { 0 }; u16 bits_required_for_object_number { 0 }; u32 least_length_of_a_page { 0 }; u16 bits_required_for_page_length { 0 }; u32 least_offset_of_any_content_stream { 0 }; u16 bits_required_for_content_stream_offsets { 0 }; u32 least_content_stream_length { 0 }; u16 bits_required_for_content_stream_length { 0 }; u16 bits_required_for_number_of_shared_obj_refs { 0 }; u16 bits_required_for_greatest_shared_obj_identifier { 0 }; u16 bits_required_for_fraction_numerator { 0 }; u16 shared_object_reference_fraction_denominator { 0 }; }; struct PageOffsetHintTableEntry { u32 objects_in_page_number { 0 }; u32 page_length_number { 0 }; u32 number_of_shared_objects { 0 }; Vector shared_object_identifiers {}; Vector shared_object_location_numerators {}; u32 page_content_stream_offset_number { 0 }; u32 page_content_stream_length_number { 0 }; }; friend struct AK::Formatter; friend struct AK::Formatter; friend struct AK::Formatter; explicit Parser(ReadonlyBytes); PDFErrorOr parse_header(); PDFErrorOr initialize_linearization_dict(); PDFErrorOr initialize_linearized_xref_table(); PDFErrorOr initialize_non_linearized_xref_table(); PDFErrorOr initialize_hint_tables(); PDFErrorOr parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes); Vector parse_all_page_offset_hint_table_entries(PageOffsetHintTable const&, ReadonlyBytes hint_stream_bytes); PDFErrorOr> parse_xref_table(); PDFErrorOr> parse_file_trailer(); bool navigate_to_before_eof_marker(); bool navigate_to_after_startxref(); String parse_comment(); PDFErrorOr parse_value(); PDFErrorOr parse_possible_indirect_value_or_ref(); PDFErrorOr> parse_indirect_value(int index, int generation); PDFErrorOr> parse_indirect_value(); PDFErrorOr parse_number(); PDFErrorOr> parse_name(); NonnullRefPtr parse_string(); String parse_literal_string(); String parse_hex_string(); PDFErrorOr> parse_array(); PDFErrorOr> parse_dict(); PDFErrorOr> parse_stream(NonnullRefPtr dict); PDFErrorOr> parse_graphics_commands(); bool matches_eol() const; bool matches_whitespace() const; bool matches_number() const; bool matches_delimiter() const; bool matches_regular_character() const; bool consume_eol(); bool consume_whitespace(); char consume(); void consume(int amount); bool consume(char); Error error( String const& message #ifdef PDF_DEBUG , SourceLocation loc = SourceLocation::current() #endif ) const; Reader m_reader; WeakPtr m_document; RefPtr m_xref_table; RefPtr m_trailer; Optional m_linearization_dictionary; }; };