summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Petrov <dpetroff@gmail.com>2021-12-14 01:19:56 +0100
committerAndreas Kling <kling@serenityos.org>2021-12-16 22:44:45 +0100
commit6f5102f435b151d9e58eaccf7b9047a329207a9a (patch)
tree01e38836254dd275ddf4b3151ee6d8e6df7ad192
parent2c1a6ce9a5b006f0797a46da6083e7da5c4e645b (diff)
downloadserenity-6f5102f435b151d9e58eaccf7b9047a329207a9a.zip
LibTextCodec: Add alternate Cyrillic (aka Koi8-r) encoding
Fixes #6840.
-rw-r--r--Userland/Libraries/LibTextCodec/Decoder.cpp36
-rw-r--r--Userland/Libraries/LibTextCodec/Decoder.h5
2 files changed, 41 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp
index b306ac1af2..8fac02d145 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.cpp
+++ b/Userland/Libraries/LibTextCodec/Decoder.cpp
@@ -59,6 +59,14 @@ CyrillicDecoder& cyrillic_decoder()
return *decoder;
}
+Koi8RDecoder& koi8r_decoder()
+{
+ static Koi8RDecoder* decoder = nullptr;
+ if (!decoder)
+ decoder = new Koi8RDecoder;
+ return *decoder;
+}
+
Latin9Decoder& latin9_decoder()
{
static Latin9Decoder* decoder = nullptr;
@@ -93,6 +101,8 @@ Decoder* decoder_for(const String& a_encoding)
return &hebrew_decoder();
if (encoding.value().equals_ignoring_case("windows-1251"))
return &cyrillic_decoder();
+ if (encoding.value().equals_ignoring_case("koi8-r"))
+ return &koi8r_decoder();
if (encoding.value().equals_ignoring_case("iso-8859-15"))
return &latin9_decoder();
if (encoding.value().equals_ignoring_case("windows-1254"))
@@ -165,6 +175,8 @@ Optional<String> get_standardized_encoding(const String& encoding)
return "windows-1258";
if (trimmed_lowercase_encoding.is_one_of("x-mac-cyrillic", "x-mac-ukrainian"))
return "x-mac-cyrillic";
+ if (trimmed_lowercase_encoding.is_one_of("koi8-r", "koi8r"))
+ return "koi8-r";
if (trimmed_lowercase_encoding.is_one_of("chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk"))
return "GBK";
if (trimmed_lowercase_encoding == "gb18030")
@@ -376,6 +388,30 @@ void CyrillicDecoder::process(StringView input, Function<void(u32)> on_code_poin
}
}
+void Koi8RDecoder::process(StringView input, Function<void(u32)> on_code_point)
+{
+ // clang-format off
+ static constexpr Array<u32, 128> translation_table = {
+ 0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
+ 0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,0x2264,0x2265,0xA0,0x2321,0xb0,0xb2,0xb7,0xf7,
+ 0x2550,0x2551,0x2552,0xd191,0x2553,0x2554,0x2555,0x2556,0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
+ 0x255f,0x2560,0x2561,0xd081,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0xa9,
+ 0x44e,0x430,0x431,0x446,0x434,0x435,0x444,0x433,0x445,0x438,0x439,0x43a,0x43b,0x43c,0x43d,0x43e,
+ 0x43f,0x44f,0x440,0x441,0x442,0x443,0x436,0x432,0x44c,0x44b,0x437,0x448,0x44d,0x449,0x447,0x44a,
+ 0x42e,0x410,0x441,0x426,0x414,0x415,0x424,0x413,0x425,0x418,0x419,0x41a,0x41b,0x41c,0x41d,0x41e,
+ 0x41f,0x42f,0x420,0x421,0x422,0x423,0x416,0x412,0x42c,0x42b,0x417,0x428,0x42d,0x429,0x427,0x42a,
+ };
+ // clang-format on
+
+ for (unsigned char ch : input) {
+ if (ch < 0x80) { // Superset of ASCII
+ on_code_point(ch);
+ } else {
+ on_code_point(translation_table[ch - 0x80]);
+ }
+ }
+}
+
void Latin9Decoder::process(StringView input, Function<void(u32)> on_code_point)
{
auto convert_latin9_to_utf8 = [](u8 ch) -> u32 {
diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h
index b1d0e2f429..dd3d3bba89 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.h
+++ b/Userland/Libraries/LibTextCodec/Decoder.h
@@ -52,6 +52,11 @@ public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
+class Koi8RDecoder final : public Decoder {
+public:
+ virtual void process(StringView, Function<void(u32)> on_code_point) override;
+};
+
class Latin9Decoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;