summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibIMAP
diff options
context:
space:
mode:
authorLuke <luke.wilde@live.co.uk>2021-07-21 01:05:19 +0100
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2021-07-24 20:11:28 +0430
commitc63913b6334f84b6c3da65ce8760a9342163df9a (patch)
tree59ca5ff1536a017e66226c3683269e5b8c7144f0 /Userland/Libraries/LibIMAP
parentcc0914ae58e37542d9b37a762e5ec5e943e4d5b9 (diff)
downloadserenity-c63913b6334f84b6c3da65ce8760a9342163df9a.zip
LibIMAP: Add quoted printable decoder
This is a very common encoding for e-mail. Gmail seems to encode all HTML e-mail in it. imap qp clang
Diffstat (limited to 'Userland/Libraries/LibIMAP')
-rw-r--r--Userland/Libraries/LibIMAP/CMakeLists.txt7
-rw-r--r--Userland/Libraries/LibIMAP/QuotedPrintable.cpp86
-rw-r--r--Userland/Libraries/LibIMAP/QuotedPrintable.h15
3 files changed, 107 insertions, 1 deletions
diff --git a/Userland/Libraries/LibIMAP/CMakeLists.txt b/Userland/Libraries/LibIMAP/CMakeLists.txt
index 95fd92eec1..bc21a4dbe9 100644
--- a/Userland/Libraries/LibIMAP/CMakeLists.txt
+++ b/Userland/Libraries/LibIMAP/CMakeLists.txt
@@ -1,4 +1,9 @@
-set(SOURCES Objects.cpp Client.cpp Parser.cpp)
+set(SOURCES
+ Client.cpp
+ Objects.cpp
+ Parser.cpp
+ QuotedPrintable.cpp
+)
set(GENERATED_SOURCES)
diff --git a/Userland/Libraries/LibIMAP/QuotedPrintable.cpp b/Userland/Libraries/LibIMAP/QuotedPrintable.cpp
new file mode 100644
index 0000000000..01c81c7383
--- /dev/null
+++ b/Userland/Libraries/LibIMAP/QuotedPrintable.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/CharacterTypes.h>
+#include <AK/GenericLexer.h>
+#include <AK/StringBuilder.h>
+#include <LibIMAP/QuotedPrintable.h>
+
+namespace IMAP {
+
+static constexpr bool is_illegal_character(char c)
+{
+ return (u8)c > 0x7E || (is_ascii_control(c) && c != '\t' && c != '\r' && c != '\n');
+}
+
+// RFC 2045 Section 6.7 "Quoted-Printable Content-Transfer-Encoding", https://datatracker.ietf.org/doc/html/rfc2045#section-6.7
+ByteBuffer decode_quoted_printable(StringView const& input)
+{
+ GenericLexer lexer(input);
+ StringBuilder output;
+
+ // NOTE: The RFC says that encoded lines must not be longer than 76 characters.
+ // However, the RFC says implementations can ignore this and parse as is,
+ // which is the approach we're taking.
+
+ while (!lexer.is_eof()) {
+ char potential_character = lexer.consume();
+
+ if (is_illegal_character(potential_character))
+ continue;
+
+ if (potential_character == '=') {
+ if (lexer.is_eof()) {
+ TODO();
+ }
+
+ char first_escape_character = lexer.consume();
+
+ // The RFC doesn't formally allow lowercase, but says implementations can treat lowercase the same as uppercase.
+ // Thus we can use is_ascii_hex_digit.
+ if (is_ascii_hex_digit(first_escape_character)) {
+ if (lexer.is_eof()) {
+ TODO();
+ }
+
+ char second_escape_character = lexer.consume();
+
+ if (is_ascii_hex_digit(second_escape_character)) {
+ u8 actual_character = (parse_ascii_hex_digit(first_escape_character) << 4) | parse_ascii_hex_digit(second_escape_character);
+ output.append(actual_character);
+ } else {
+ TODO();
+ }
+ } else if (first_escape_character == '\r') {
+ if (lexer.is_eof()) {
+ TODO();
+ }
+
+ char second_escape_character = lexer.consume();
+
+ if (second_escape_character == '\n') {
+ // This is a soft line break. Don't append anything to the output.
+ } else {
+ TODO();
+ }
+ } else {
+ if (is_illegal_character(first_escape_character)) {
+ TODO();
+ }
+
+ // Invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the character.
+ output.append('=');
+ output.append(first_escape_character);
+ }
+ } else {
+ output.append(potential_character);
+ }
+ }
+
+ return output.to_byte_buffer();
+}
+
+}
diff --git a/Userland/Libraries/LibIMAP/QuotedPrintable.h b/Userland/Libraries/LibIMAP/QuotedPrintable.h
new file mode 100644
index 0000000000..8b127b3783
--- /dev/null
+++ b/Userland/Libraries/LibIMAP/QuotedPrintable.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/ByteBuffer.h>
+
+namespace IMAP {
+
+ByteBuffer decode_quoted_printable(StringView const&);
+
+}