diff options
author | Luke <luke.wilde@live.co.uk> | 2021-07-21 01:05:19 +0100 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2021-07-24 20:11:28 +0430 |
commit | c63913b6334f84b6c3da65ce8760a9342163df9a (patch) | |
tree | 59ca5ff1536a017e66226c3683269e5b8c7144f0 /Userland/Libraries/LibIMAP | |
parent | cc0914ae58e37542d9b37a762e5ec5e943e4d5b9 (diff) | |
download | serenity-c63913b6334f84b6c3da65ce8760a9342163df9a.zip |
LibIMAP: Add quoted printable decoder
This is a very common encoding for e-mail. Gmail seems to encode all
HTML e-mail in it.
imap qp clang
Diffstat (limited to 'Userland/Libraries/LibIMAP')
-rw-r--r-- | Userland/Libraries/LibIMAP/CMakeLists.txt | 7 | ||||
-rw-r--r-- | Userland/Libraries/LibIMAP/QuotedPrintable.cpp | 86 | ||||
-rw-r--r-- | Userland/Libraries/LibIMAP/QuotedPrintable.h | 15 |
3 files changed, 107 insertions, 1 deletions
diff --git a/Userland/Libraries/LibIMAP/CMakeLists.txt b/Userland/Libraries/LibIMAP/CMakeLists.txt index 95fd92eec1..bc21a4dbe9 100644 --- a/Userland/Libraries/LibIMAP/CMakeLists.txt +++ b/Userland/Libraries/LibIMAP/CMakeLists.txt @@ -1,4 +1,9 @@ -set(SOURCES Objects.cpp Client.cpp Parser.cpp) +set(SOURCES + Client.cpp + Objects.cpp + Parser.cpp + QuotedPrintable.cpp +) set(GENERATED_SOURCES) diff --git a/Userland/Libraries/LibIMAP/QuotedPrintable.cpp b/Userland/Libraries/LibIMAP/QuotedPrintable.cpp new file mode 100644 index 0000000000..01c81c7383 --- /dev/null +++ b/Userland/Libraries/LibIMAP/QuotedPrintable.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/CharacterTypes.h> +#include <AK/GenericLexer.h> +#include <AK/StringBuilder.h> +#include <LibIMAP/QuotedPrintable.h> + +namespace IMAP { + +static constexpr bool is_illegal_character(char c) +{ + return (u8)c > 0x7E || (is_ascii_control(c) && c != '\t' && c != '\r' && c != '\n'); +} + +// RFC 2045 Section 6.7 "Quoted-Printable Content-Transfer-Encoding", https://datatracker.ietf.org/doc/html/rfc2045#section-6.7 +ByteBuffer decode_quoted_printable(StringView const& input) +{ + GenericLexer lexer(input); + StringBuilder output; + + // NOTE: The RFC says that encoded lines must not be longer than 76 characters. + // However, the RFC says implementations can ignore this and parse as is, + // which is the approach we're taking. + + while (!lexer.is_eof()) { + char potential_character = lexer.consume(); + + if (is_illegal_character(potential_character)) + continue; + + if (potential_character == '=') { + if (lexer.is_eof()) { + TODO(); + } + + char first_escape_character = lexer.consume(); + + // The RFC doesn't formally allow lowercase, but says implementations can treat lowercase the same as uppercase. + // Thus we can use is_ascii_hex_digit. + if (is_ascii_hex_digit(first_escape_character)) { + if (lexer.is_eof()) { + TODO(); + } + + char second_escape_character = lexer.consume(); + + if (is_ascii_hex_digit(second_escape_character)) { + u8 actual_character = (parse_ascii_hex_digit(first_escape_character) << 4) | parse_ascii_hex_digit(second_escape_character); + output.append(actual_character); + } else { + TODO(); + } + } else if (first_escape_character == '\r') { + if (lexer.is_eof()) { + TODO(); + } + + char second_escape_character = lexer.consume(); + + if (second_escape_character == '\n') { + // This is a soft line break. Don't append anything to the output. + } else { + TODO(); + } + } else { + if (is_illegal_character(first_escape_character)) { + TODO(); + } + + // Invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the character. + output.append('='); + output.append(first_escape_character); + } + } else { + output.append(potential_character); + } + } + + return output.to_byte_buffer(); +} + +} diff --git a/Userland/Libraries/LibIMAP/QuotedPrintable.h b/Userland/Libraries/LibIMAP/QuotedPrintable.h new file mode 100644 index 0000000000..8b127b3783 --- /dev/null +++ b/Userland/Libraries/LibIMAP/QuotedPrintable.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/ByteBuffer.h> + +namespace IMAP { + +ByteBuffer decode_quoted_printable(StringView const&); + +} |