From 101639e52629794d52f1e9cc50a3d11a4381838b Mon Sep 17 00:00:00 2001 From: Matthew Olsson Date: Mon, 10 May 2021 10:39:19 -0700 Subject: LibPDF: Parse graphics commands --- Userland/Libraries/LibPDF/Command.h | 137 +++++++++++++++++++++++++++++++++++ Userland/Libraries/LibPDF/Parser.cpp | 46 ++++++++++++ Userland/Libraries/LibPDF/Parser.h | 7 ++ 3 files changed, 190 insertions(+) create mode 100644 Userland/Libraries/LibPDF/Command.h (limited to 'Userland/Libraries/LibPDF') diff --git a/Userland/Libraries/LibPDF/Command.h b/Userland/Libraries/LibPDF/Command.h new file mode 100644 index 0000000000..95ab114438 --- /dev/null +++ b/Userland/Libraries/LibPDF/Command.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +#define ENUMERATE_COMMANDS(V) \ + V(SaveState, save_state, q) \ + V(RestoreState, restore_state, Q) \ + V(ConcatenateMatrix, concatenate_matrix, cm) \ + V(SetLineWidth, set_line_width, w) \ + V(SetLineCap, set_line_cap, J) \ + V(SetLineJoin, set_line_join, j) \ + V(SetMiterLimit, set_miter_limit, M) \ + V(SetDashPattern, set_dash_pattern, d) \ + V(PathBegin, path_begin, m) \ + V(PathEnd, path_end, n) \ + V(PathLine, path_line, l) \ + V(PathClose, path_close, h) \ + V(PathAppendRect, path_append_rect, re) \ + V(PathStroke, path_stroke, S) \ + V(PathCloseAndStroke, path_close_and_stroke, s) \ + V(PathFillNonZero, path_fill_nonzero, f) \ + V(PathFillNonZeroDeprecated, path_fill_nonzero_deprecated, F) \ + V(PathFillEvenOdd, path_fill_evenodd, f*) \ + V(PathFillStrokeNonZero, path_fill_stroke_nonzero, B) \ + V(PathFillStrokeEvenOdd, path_fill_stroke_evenodd, B*) \ + V(PathCloseFillStrokeNonZero, path_close_fill_stroke_nonzero, b) \ + V(PathCloseFillStrokeEvenOdd, path_close_fill_stroke_evenodd, b*) \ + V(TextSetCharSpace, text_set_char_space, Tc) \ + V(TextSetWordSpace, text_set_word_space, Tw) \ + V(TextSetHorizontalScale, text_set_horizontal_scale, Tz) \ + V(TextSetLeading, text_set_leading, TL) \ + V(TextSetFont, text_set_font, Tf) \ + V(TextSetRenderingMode, text_set_rendering_mode, Tr) \ + V(TextSetRise, text_set_rise, Ts) \ + V(TextBegin, text_begin, BT) \ + V(TextEnd, text_end, ET) \ + V(TextNextLineOffset, text_next_line_offset, Td) \ + V(TextNextLineAndSetLeading, text_next_line_and_set_leading, TD) \ + V(TextSetMatrixAndLineMatrix, text_set_matrix_and_line_matrix, Tm) \ + V(TextNextLine, text_next_line, T*) \ + V(TextShowString, text_show_string, Tj) + +namespace PDF { + +enum class CommandType { +#define V(name, snake_name, symbol) name, + ENUMERATE_COMMANDS(V) +#undef V + TextNextLineShowString, +}; + +class Command { +public: + static CommandType command_type_from_symbol(const StringView& symbol_string) + { +#define V(name, snake_name, symbol) \ + if (symbol_string == #symbol) \ + return CommandType::name; + ENUMERATE_COMMANDS(V) +#undef V + + if (symbol_string == "'") + return CommandType::TextNextLineShowString; + + dbgln("unsupported graphics symbol {}", symbol_string); + VERIFY_NOT_REACHED(); + } + + static const char* command_name(CommandType command_name) + { +#define V(name, snake_name, symbol) \ + if (command_name == CommandType::name) \ + return #name; + ENUMERATE_COMMANDS(V) +#undef V + + if (command_name == CommandType::TextNextLineShowString) + return "TextNextLineShowString"; + + VERIFY_NOT_REACHED(); + } + + static const char* command_symbol(CommandType command_name) + { +#define V(name, snake_name, symbol) \ + if (command_name == CommandType::name) \ + return #symbol; + ENUMERATE_COMMANDS(V) +#undef V + + if (command_name == CommandType::TextNextLineShowString) + return "'"; + + VERIFY_NOT_REACHED(); + } + + Command(CommandType command_type, Vector arguments) + : m_command_type(command_type) + , m_arguments(move(arguments)) + { + } + + [[nodiscard]] ALWAYS_INLINE CommandType command_type() const { return m_command_type; } + [[nodiscard]] ALWAYS_INLINE const Vector& arguments() const { return m_arguments; } + +private: + CommandType m_command_type; + Vector m_arguments; +}; + +} + +namespace AK { + +template<> +struct Formatter : Formatter { + void format(FormatBuilder& format_builder, const PDF::Command& command) + { + StringBuilder builder; + builder.appendff("{} [ ", PDF::Command::command_name(command.command_type())); + for (auto& argument : command.arguments()) + builder.appendff(" {}", argument); + builder.append(" ]"); + Formatter::format(format_builder, builder.to_string()); + } +}; + +} diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 2c3d5b3b8b..a14e1e1eab 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -19,11 +19,22 @@ static NonnullRefPtr make_object(Args... args) requires(IsBaseOf) return adopt_ref(*new T(forward(args)...)); } +Vector Parser::parse_graphics_commands(const ReadonlyBytes& bytes) +{ + Parser parser(bytes); + return parser.parse_graphics_commands(); +} + Parser::Parser(Badge, const ReadonlyBytes& bytes) : m_reader(bytes) { } +Parser::Parser(const ReadonlyBytes& bytes) + : m_reader(bytes) +{ +} + bool Parser::perform_validation() { return !sloppy_is_linearized() && parse_header(); @@ -650,6 +661,41 @@ NonnullRefPtr Parser::parse_stream(NonnullRefPtr dict) return make_object(dict, bytes); } +Vector Parser::parse_graphics_commands() +{ + Vector commands; + Vector command_args; + + constexpr static auto is_command_char = [](char ch) { + return isalpha(ch) || ch == '*' || ch == '\''; + }; + + while (!m_reader.done()) { + auto ch = m_reader.peek(); + if (is_command_char(ch)) { + auto command_start = m_reader.offset(); + while (is_command_char(ch)) { + consume(); + if (m_reader.done()) + break; + ch = m_reader.peek(); + } + + auto command_string = StringView(m_reader.bytes().slice(command_start, m_reader.offset() - command_start)); + auto command_type = Command::command_type_from_symbol(command_string); + commands.append(Command(command_type, move(command_args))); + command_args = Vector(); + consume_whitespace(); + + continue; + } + + command_args.append(parse_value()); + } + + return commands; +} + bool Parser::matches_eol() const { return m_reader.matches_any(0xa, 0xd); diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h index e4f2fc64cb..216c9e6da8 100644 --- a/Userland/Libraries/LibPDF/Parser.h +++ b/Userland/Libraries/LibPDF/Parser.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -17,6 +18,8 @@ class Document; class Parser { public: + static Vector parse_graphics_commands(const ReadonlyBytes&); + Parser(Badge, const ReadonlyBytes&); void set_document(RefPtr document) { m_document = document; } @@ -34,6 +37,8 @@ public: RefPtr conditionally_parse_page_tree_node_at_offset(size_t offset); private: + explicit Parser(const ReadonlyBytes&); + bool parse_header(); XRefTable parse_xref_table(); NonnullRefPtr parse_file_trailer(); @@ -64,6 +69,8 @@ private: NonnullRefPtr parse_dict(); NonnullRefPtr parse_stream(NonnullRefPtr dict); + Vector parse_graphics_commands(); + bool matches_eol() const; bool matches_whitespace() const; bool matches_number() const; -- cgit v1.2.3