diff options
author | Aatos Majava <aatos.majava@protonmail.com> | 2021-06-23 16:18:50 +0300 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-06-23 16:32:47 +0100 |
commit | 3b2a528b33e0579af9313856412757a35efc553a (patch) | |
tree | 120704a06aad4df07d2271548d88f3c8bd5e7e4e /Userland/Libraries/LibTextCodec | |
parent | 21ee0ad6fc14348ca35fa31a17c53dcec6563db8 (diff) | |
download | serenity-3b2a528b33e0579af9313856412757a35efc553a.zip |
LibTextCodec: Add Turkish (aka ISO-8859-9, Windows-1254) encoding
Diffstat (limited to 'Userland/Libraries/LibTextCodec')
-rw-r--r-- | Userland/Libraries/LibTextCodec/Decoder.cpp | 39 | ||||
-rw-r--r-- | Userland/Libraries/LibTextCodec/Decoder.h | 5 |
2 files changed, 44 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index f337b8635e..4e83cc779a 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -67,6 +67,14 @@ Latin9Decoder& latin9_decoder() return *decoder; } +TurkishDecoder& turkish_decoder() +{ + static TurkishDecoder* decoder = nullptr; + if (!decoder) + decoder = new TurkishDecoder; + return *decoder; +} + } Decoder* decoder_for(const String& a_encoding) @@ -87,6 +95,8 @@ Decoder* decoder_for(const String& a_encoding) return &cyrillic_decoder(); if (encoding.value().equals_ignoring_case("iso-8859-15")) return &latin9_decoder(); + if (encoding.value().equals_ignoring_case("windows-1254")) + return &turkish_decoder(); } dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding); return nullptr; @@ -383,4 +393,33 @@ String Latin9Decoder::to_utf8(const StringView& input) return builder.to_string(); } +String TurkishDecoder::to_utf8(const StringView& input) +{ + auto convert_turkish_to_utf8 = [](u8 ch) -> u32 { + // Turkish (aka ISO-8859-9, Windows-1254) is the same as the first 256 Unicode code points, except for 6 characters. + switch (ch) { + case 0xD0: + return 0x11E; + case 0xDD: + return 0x130; + case 0xDE: + return 0x15E; + case 0xF0: + return 0x11F; + case 0xFD: + return 0x131; + case 0xFE: + return 0x15F; + default: + return ch; + } + }; + + StringBuilder builder(input.length()); + for (auto ch : input) { + builder.append_code_point(convert_turkish_to_utf8(ch)); + } + return builder.to_string(); +} + } diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index ae142c9f89..871ef35d9a 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -53,6 +53,11 @@ public: virtual String to_utf8(const StringView&) override; }; +class TurkishDecoder final : public Decoder { +public: + virtual String to_utf8(const StringView&) override; +}; + Decoder* decoder_for(const String& encoding); Optional<String> get_standardized_encoding(const String& encoding); bool is_standardized_encoding(const String& encoding); |