diff options
author | Dmitry Petrov <dpetroff@gmail.com> | 2021-12-14 01:19:56 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-12-16 22:44:45 +0100 |
commit | 6f5102f435b151d9e58eaccf7b9047a329207a9a (patch) | |
tree | 01e38836254dd275ddf4b3151ee6d8e6df7ad192 | |
parent | 2c1a6ce9a5b006f0797a46da6083e7da5c4e645b (diff) | |
download | serenity-6f5102f435b151d9e58eaccf7b9047a329207a9a.zip |
LibTextCodec: Add alternate Cyrillic (aka Koi8-r) encoding
Fixes #6840.
-rw-r--r-- | Userland/Libraries/LibTextCodec/Decoder.cpp | 36 | ||||
-rw-r--r-- | Userland/Libraries/LibTextCodec/Decoder.h | 5 |
2 files changed, 41 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index b306ac1af2..8fac02d145 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -59,6 +59,14 @@ CyrillicDecoder& cyrillic_decoder() return *decoder; } +Koi8RDecoder& koi8r_decoder() +{ + static Koi8RDecoder* decoder = nullptr; + if (!decoder) + decoder = new Koi8RDecoder; + return *decoder; +} + Latin9Decoder& latin9_decoder() { static Latin9Decoder* decoder = nullptr; @@ -93,6 +101,8 @@ Decoder* decoder_for(const String& a_encoding) return &hebrew_decoder(); if (encoding.value().equals_ignoring_case("windows-1251")) return &cyrillic_decoder(); + if (encoding.value().equals_ignoring_case("koi8-r")) + return &koi8r_decoder(); if (encoding.value().equals_ignoring_case("iso-8859-15")) return &latin9_decoder(); if (encoding.value().equals_ignoring_case("windows-1254")) @@ -165,6 +175,8 @@ Optional<String> get_standardized_encoding(const String& encoding) return "windows-1258"; if (trimmed_lowercase_encoding.is_one_of("x-mac-cyrillic", "x-mac-ukrainian")) return "x-mac-cyrillic"; + if (trimmed_lowercase_encoding.is_one_of("koi8-r", "koi8r")) + return "koi8-r"; if (trimmed_lowercase_encoding.is_one_of("chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk")) return "GBK"; if (trimmed_lowercase_encoding == "gb18030") @@ -376,6 +388,30 @@ void CyrillicDecoder::process(StringView input, Function<void(u32)> on_code_poin } } +void Koi8RDecoder::process(StringView input, Function<void(u32)> on_code_point) +{ + // clang-format off + static constexpr Array<u32, 128> translation_table = { + 0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590, + 0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,0x2264,0x2265,0xA0,0x2321,0xb0,0xb2,0xb7,0xf7, + 0x2550,0x2551,0x2552,0xd191,0x2553,0x2554,0x2555,0x2556,0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e, + 0x255f,0x2560,0x2561,0xd081,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0xa9, + 0x44e,0x430,0x431,0x446,0x434,0x435,0x444,0x433,0x445,0x438,0x439,0x43a,0x43b,0x43c,0x43d,0x43e, + 0x43f,0x44f,0x440,0x441,0x442,0x443,0x436,0x432,0x44c,0x44b,0x437,0x448,0x44d,0x449,0x447,0x44a, + 0x42e,0x410,0x441,0x426,0x414,0x415,0x424,0x413,0x425,0x418,0x419,0x41a,0x41b,0x41c,0x41d,0x41e, + 0x41f,0x42f,0x420,0x421,0x422,0x423,0x416,0x412,0x42c,0x42b,0x417,0x428,0x42d,0x429,0x427,0x42a, + }; + // clang-format on + + for (unsigned char ch : input) { + if (ch < 0x80) { // Superset of ASCII + on_code_point(ch); + } else { + on_code_point(translation_table[ch - 0x80]); + } + } +} + void Latin9Decoder::process(StringView input, Function<void(u32)> on_code_point) { auto convert_latin9_to_utf8 = [](u8 ch) -> u32 { diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index b1d0e2f429..dd3d3bba89 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -52,6 +52,11 @@ public: virtual void process(StringView, Function<void(u32)> on_code_point) override; }; +class Koi8RDecoder final : public Decoder { +public: + virtual void process(StringView, Function<void(u32)> on_code_point) override; +}; + class Latin9Decoder final : public Decoder { public: virtual void process(StringView, Function<void(u32)> on_code_point) override; |