2 files changed, 28 insertions, 0 deletions
diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp
index 0aba9c2599..a212136fa3 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.cpp
+++ b/Userland/Libraries/LibTextCodec/Decoder.cpp
@@ -176,6 +176,30 @@ Decoder* bom_sniff_to_decoder(StringView input)
     return nullptr;
 }
 
+// https://encoding.spec.whatwg.org/#decode
+String convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder& fallback_decoder, StringView input)
+{
+    Decoder* actual_decoder = &fallback_decoder;
+
+    // 1. Let BOMEncoding be the result of BOM sniffing ioQueue.
+    // 2. If BOMEncoding is non-null:
+    if (auto* unicode_decoder = bom_sniff_to_decoder(input); unicode_decoder) {
+        // 1. Set encoding to BOMEncoding.
+        actual_decoder = unicode_decoder;
+
+        // 2. Read three bytes from ioQueue, if BOMEncoding is UTF-8; otherwise read two bytes. (Do nothing with those bytes.)
+        // FIXME: I imagine this will be pretty slow for large inputs, as it's regenerating the input without the first 2/3 bytes.
+        input = input.substring_view(unicode_decoder == &s_utf8_decoder ? 3 : 2);
+    }
+
+    VERIFY(actual_decoder);
+
+    // FIXME: 3. Process a queue with an instance of encoding’s decoder, ioQueue, output, and "replacement".
+    //        This isn't the exact same as the spec, especially the error mode of "replacement", which we don't have the concept of yet.
+    // 4. Return output.
+    return actual_decoder->to_utf8(input);
+}
+
 String Decoder::to_utf8(StringView input)
 {
     StringBuilder builder(input.length());
diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h
index 7edf2633a1..a4b1e68dd2 100644
--- a/Userland/Libraries/LibTextCodec/Decoder.h
+++ b/Userland/Libraries/LibTextCodec/Decoder.h
@@ -73,4 +73,8 @@ Optional<String> get_standardized_encoding(const String& encoding);
 // This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
 Decoder* bom_sniff_to_decoder(StringView);
 
+// NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
+// This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
+String convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView);
+
 }