LibTextCodec: Add ISO-8859-15 (aka Latin-9) encoding

author: Aatos Majava <aatos.majava@protonmail.com> 2021-06-15 16:07:56 +0300
committer: Linus Groh <mail@linusgroh.de> 2021-06-15 15:12:09 +0100
commit: 7597cca5c61070ffb699ce0a80d6073cb8dc6f15 (patch)
tree: e60f3f8e766d6e3314064775318b313430934898 /Userland/Libraries/LibTextCodec
parent: 155f1026ce7222ae2d4f855e5389230860acea3e (diff)
download: serenity-7597cca5c61070ffb699ce0a80d6073cb8dc6f15.zip
2 files changed, 48 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp
index bebdb004a3..f337b8635e 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.cpp
+++ b/Userland/Libraries/LibTextCodec/Decoder.cpp
@@ -59,6 +59,14 @@ CyrillicDecoder& cyrillic_decoder()
     return *decoder;
 }
 
+Latin9Decoder& latin9_decoder()
+{
+    static Latin9Decoder* decoder = nullptr;
+    if (!decoder)
+        decoder = new Latin9Decoder;
+    return *decoder;
+}
+
 }
 
 Decoder* decoder_for(const String& a_encoding)
@@ -77,6 +85,8 @@ Decoder* decoder_for(const String& a_encoding)
             return &hebrew_decoder();
         if (encoding.value().equals_ignoring_case("windows-1251"))
             return &cyrillic_decoder();
+        if (encoding.value().equals_ignoring_case("iso-8859-15"))
+            return &latin9_decoder();
     }
     dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
     return nullptr;
@@ -340,4 +350,37 @@ String CyrillicDecoder::to_utf8(const StringView& input)
     return builder.to_string();
 }
 
+String Latin9Decoder::to_utf8(const StringView& input)
+{
+    auto convert_latin9_to_utf8 = [](u8 ch) -> u32 {
+        // Latin9 is the same as the first 256 Unicode code points, except for 8 characters.
+        switch (ch) {
+        case 0xA4:
+            return 0x20AC;
+        case 0xA6:
+            return 0x160;
+        case 0xA8:
+            return 0x161;
+        case 0xB4:
+            return 0x17D;
+        case 0xB8:
+            return 0x17E;
+        case 0xBC:
+            return 0x152;
+        case 0xBD:
+            return 0x153;
+        case 0xBE:
+            return 0x178;
+        default:
+            return ch;
+        }
+    };
+
+    StringBuilder builder(input.length());
+    for (auto ch : input) {
+        builder.append_code_point(convert_latin9_to_utf8(ch));
+    }
+    return builder.to_string();
+}
+
 }
diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h
index 585f05ff0d..ae142c9f89 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.h
+++ b/Userland/Libraries/LibTextCodec/Decoder.h
@@ -48,6 +48,11 @@ public:
     virtual String to_utf8(const StringView&) override;
 };
 
+class Latin9Decoder final : public Decoder {
+public:
+    virtual String to_utf8(const StringView&) override;
+};
+
 Decoder* decoder_for(const String& encoding);
 Optional<String> get_standardized_encoding(const String& encoding);
 bool is_standardized_encoding(const String& encoding);
author	Aatos Majava <aatos.majava@protonmail.com>	2021-06-15 16:07:56 +0300
committer	Linus Groh <mail@linusgroh.de>	2021-06-15 15:12:09 +0100
commit	7597cca5c61070ffb699ce0a80d6073cb8dc6f15 (patch)
tree	e60f3f8e766d6e3314064775318b313430934898 /Userland/Libraries/LibTextCodec
parent	155f1026ce7222ae2d4f855e5389230860acea3e (diff)
download	serenity-7597cca5c61070ffb699ce0a80d6073cb8dc6f15.zip