diff options
Diffstat (limited to 'Userland/Libraries/LibTextCodec/Decoder.cpp')
-rw-r--r-- | Userland/Libraries/LibTextCodec/Decoder.cpp | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index f82bb029eb..37a28d5999 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -51,6 +51,14 @@ HebrewDecoder& hebrew_decoder() return *decoder; } +CyrillicDecoder& cyrillic_decoder() +{ + static CyrillicDecoder* decoder = nullptr; + if (!decoder) + decoder = new CyrillicDecoder; + return *decoder; +} + } Decoder* decoder_for(const String& a_encoding) @@ -66,6 +74,8 @@ Decoder* decoder_for(const String& a_encoding) return &latin2_decoder(); if (encoding.equals_ignoring_case("windows-1255")) return &hebrew_decoder(); + if (encoding.equals_ignoring_case("windows-1251")) + return &cyrillic_decoder(); dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding); return nullptr; } @@ -304,4 +314,27 @@ String HebrewDecoder::to_utf8(const StringView& input) return builder.to_string(); } +String CyrillicDecoder::to_utf8(const StringView& input) +{ + static constexpr Array<u32, 128> translation_table = { + 0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F, + 0x452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x459, 0x203A, 0x45A, 0x45C, 0x45B, 0x45F, + 0xA0, 0x40E, 0x45E, 0x408, 0xA4, 0x490, 0xA6, 0xA7, 0x401, 0xA9, 0x404, 0xAB, 0xAC, 0xAD, 0xAE, 0x407, + 0xB0, 0xB1, 0x406, 0x456, 0x491, 0xB5, 0xB6, 0xB7, 0x451, 0x2116, 0x454, 0xBB, 0x458, 0x405, 0x455, 0x457, + 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, 0x418, 0x419, 0x41A, 0x41B, 0x41C, 0x41D, 0x41E, 0x41F, + 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 0x428, 0x429, 0x42A, 0x42B, 0x42C, 0x42D, 0x42E, 0x42F, + 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x438, 0x439, 0x43A, 0x43B, 0x43C, 0x43D, 0x43E, 0x43F, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x44A, 0x44B, 0x44C, 0x44D, 0x44E, 0x44F + }; + StringBuilder builder(input.length()); + for (unsigned char ch : input) { + if (ch < 0x80) { // Superset of ASCII + builder.append(ch); + } else { + builder.append_code_point(translation_table[ch - 0x80]); + } + } + return builder.to_string(); +} + } |