summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibTextCodec
diff options
context:
space:
mode:
authorAatos Majava <aatos.majava@protonmail.com>2021-06-23 16:18:50 +0300
committerLinus Groh <mail@linusgroh.de>2021-06-23 16:32:47 +0100
commit3b2a528b33e0579af9313856412757a35efc553a (patch)
tree120704a06aad4df07d2271548d88f3c8bd5e7e4e /Userland/Libraries/LibTextCodec
parent21ee0ad6fc14348ca35fa31a17c53dcec6563db8 (diff)
downloadserenity-3b2a528b33e0579af9313856412757a35efc553a.zip
LibTextCodec: Add Turkish (aka ISO-8859-9, Windows-1254) encoding
Diffstat (limited to 'Userland/Libraries/LibTextCodec')
-rw-r--r--Userland/Libraries/LibTextCodec/Decoder.cpp39
-rw-r--r--Userland/Libraries/LibTextCodec/Decoder.h5
2 files changed, 44 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp
index f337b8635e..4e83cc779a 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.cpp
+++ b/Userland/Libraries/LibTextCodec/Decoder.cpp
@@ -67,6 +67,14 @@ Latin9Decoder& latin9_decoder()
return *decoder;
}
+TurkishDecoder& turkish_decoder()
+{
+ static TurkishDecoder* decoder = nullptr;
+ if (!decoder)
+ decoder = new TurkishDecoder;
+ return *decoder;
+}
+
}
Decoder* decoder_for(const String& a_encoding)
@@ -87,6 +95,8 @@ Decoder* decoder_for(const String& a_encoding)
return &cyrillic_decoder();
if (encoding.value().equals_ignoring_case("iso-8859-15"))
return &latin9_decoder();
+ if (encoding.value().equals_ignoring_case("windows-1254"))
+ return &turkish_decoder();
}
dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
return nullptr;
@@ -383,4 +393,33 @@ String Latin9Decoder::to_utf8(const StringView& input)
return builder.to_string();
}
+String TurkishDecoder::to_utf8(const StringView& input)
+{
+ auto convert_turkish_to_utf8 = [](u8 ch) -> u32 {
+ // Turkish (aka ISO-8859-9, Windows-1254) is the same as the first 256 Unicode code points, except for 6 characters.
+ switch (ch) {
+ case 0xD0:
+ return 0x11E;
+ case 0xDD:
+ return 0x130;
+ case 0xDE:
+ return 0x15E;
+ case 0xF0:
+ return 0x11F;
+ case 0xFD:
+ return 0x131;
+ case 0xFE:
+ return 0x15F;
+ default:
+ return ch;
+ }
+ };
+
+ StringBuilder builder(input.length());
+ for (auto ch : input) {
+ builder.append_code_point(convert_turkish_to_utf8(ch));
+ }
+ return builder.to_string();
+}
+
}
diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h
index ae142c9f89..871ef35d9a 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.h
+++ b/Userland/Libraries/LibTextCodec/Decoder.h
@@ -53,6 +53,11 @@ public:
virtual String to_utf8(const StringView&) override;
};
+class TurkishDecoder final : public Decoder {
+public:
+ virtual String to_utf8(const StringView&) override;
+};
+
Decoder* decoder_for(const String& encoding);
Optional<String> get_standardized_encoding(const String& encoding);
bool is_standardized_encoding(const String& encoding);