diff options
author | Andreas Kling <kling@serenityos.org> | 2020-05-03 22:41:34 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-03 23:01:58 +0200 |
commit | e09b83c60c77bef5fc7704dfe6f70df75462b62f (patch) | |
tree | 6e2e82163757b231678b6980b92d6d6aa66f6097 /Libraries/LibWeb | |
parent | f3676ebef501a6a3ea5748cc405841a90f60d979 (diff) | |
download | serenity-e09b83c60c77bef5fc7704dfe6f70df75462b62f.zip |
LibTextCodec: Start fleshing out a simple text codec library
We're starting with a very basic decoding API and only ISO-8859-1 and
UTF-8 decoding (and UTF-8 decoding is really a no-op since String is
expected to be UTF-8.)
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLParser.cpp | 19 |
1 files changed, 4 insertions, 15 deletions
diff --git a/Libraries/LibWeb/Parser/HTMLParser.cpp b/Libraries/LibWeb/Parser/HTMLParser.cpp index ca46b8de53..ab44078286 100644 --- a/Libraries/LibWeb/Parser/HTMLParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLParser.cpp @@ -27,6 +27,7 @@ #include <AK/Function.h> #include <AK/NonnullRefPtrVector.h> #include <AK/StringBuilder.h> +#include <LibTextCodec/Decoder.h> #include <LibWeb/DOM/Comment.h> #include <LibWeb/DOM/DocumentFragment.h> #include <LibWeb/DOM/DocumentType.h> @@ -385,21 +386,9 @@ static bool parse_html_document(const StringView& html, Document& document, Pare String to_utf8(const StringView& input, const String& encoding) { - String output; - if (encoding == "utf-8") { - output = input; - } else if (encoding == "iso-8859-1") { - StringBuilder builder(input.length()); - for (size_t i = 0; i < input.length(); ++i) { - u8 ch = input[i]; - builder.append(ch >= 0x80 ? '?' : ch); - } - output = builder.to_string(); - } else { - dbg() << "Unknown encoding " << encoding; - ASSERT_NOT_REACHED(); - } - return output; + auto* decoder = TextCodec::decoder_for(encoding); + ASSERT(decoder); + return decoder->to_utf8(input); } RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding) |