From 320f5f91ab297dd9e494950a3429f6450d6d31e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Offenh=C3=A4user?= Date: Fri, 24 Mar 2023 21:58:51 +0100 Subject: LibPDF: Ignore whitespace in the ASCII hex filter The spec tells us that any amount of whitespace may appear between the hex digits and that it should just be ignored. --- Userland/Libraries/LibPDF/Filter.cpp | 44 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'Userland') diff --git a/Userland/Libraries/LibPDF/Filter.cpp b/Userland/Libraries/LibPDF/Filter.cpp index f5aea25242..ba4ff22649 100644 --- a/Userland/Libraries/LibPDF/Filter.cpp +++ b/Userland/Libraries/LibPDF/Filter.cpp @@ -56,29 +56,33 @@ PDFErrorOr Filter::decode(ReadonlyBytes bytes, DeprecatedFlyString c PDFErrorOr Filter::decode_ascii_hex(ReadonlyBytes bytes) { - if (bytes.size() % 2 == 0) - return TRY(decode_hex(bytes)); - - // FIXME: Integrate this padding into AK/Hex? - - auto output = TRY(ByteBuffer::create_zeroed(bytes.size() / 2 + 1)); - - for (size_t i = 0; i < bytes.size() / 2; ++i) { - auto const c1 = decode_hex_digit(static_cast(bytes[i * 2])); - if (c1 >= 16) - return AK::Error::from_string_literal("Hex string contains invalid digit"); - - auto const c2 = decode_hex_digit(static_cast(bytes[i * 2 + 1])); - if (c2 >= 16) - return AK::Error::from_string_literal("Hex string contains invalid digit"); - - output[i] = (c1 << 4) + c2; + ByteBuffer output; + + bool have_read_high_nibble = false; + u8 high_nibble = 0; + for (u8 byte : bytes) { + // 3.3.1 ASCIIHexDecode Filter + // All white-space characters [...] are ignored. + // FIXME: Any other characters cause an error. + if (is_ascii_hex_digit(byte)) { + u8 hex_digit = decode_hex_digit(byte); + if (have_read_high_nibble) { + u8 full_byte = (high_nibble << 4) | hex_digit; + TRY(output.try_append(full_byte)); + have_read_high_nibble = false; + } else { + high_nibble = hex_digit; + have_read_high_nibble = true; + } + } } - // Process last byte with a padded zero - output[output.size() - 1] = decode_hex_digit(static_cast(bytes[bytes.size() - 1])) * 16; + // If the filter encounters the EOD marker after reading an odd number + // of hexadecimal digits, it behaves as if a 0 followed the last digit. + if (have_read_high_nibble) + TRY(output.try_append(high_nibble << 4)); - return { move(output) }; + return output; }; PDFErrorOr Filter::decode_ascii85(ReadonlyBytes bytes) -- cgit v1.2.3