diff options
author | Sam Atkins <atkinssj@serenityos.org> | 2023-02-17 20:15:10 +0000 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2023-02-19 17:15:47 +0100 |
commit | 2db168acc112e8f032d57f8225a8122c0e7f4207 (patch) | |
tree | add8e7858b1fe3861624a8392703d6172e4fe30c /Userland/Libraries/LibWeb | |
parent | 3c5090e17257667514156900a552d875b46da27d (diff) | |
download | serenity-2db168acc112e8f032d57f8225a8122c0e7f4207.zip |
LibTextCodec+Everywhere: Port Decoders to new Strings
Diffstat (limited to 'Userland/Libraries/LibWeb')
8 files changed, 41 insertions, 39 deletions
diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 8cb69ab0c7..f808bbaea7 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -206,37 +206,39 @@ Tokenizer::Tokenizer(StringView input, StringView encoding) bool last_was_carriage_return = false; // To filter code points from a stream of (unfiltered) code points input: - decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) { - // Replace any U+000D CARRIAGE RETURN (CR) code points, - // U+000C FORM FEED (FF) code points, - // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) - // in input by a single U+000A LINE FEED (LF) code point. - if (code_point == '\r') { - if (last_was_carriage_return) { - builder.append('\n'); - } else { - last_was_carriage_return = true; - } - } else { - if (last_was_carriage_return) - builder.append('\n'); - - if (code_point == '\n') { - if (!last_was_carriage_return) - builder.append('\n'); - - } else if (code_point == '\f') { - builder.append('\n'); - // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { - builder.append_code_point(REPLACEMENT_CHARACTER); - } else { - builder.append_code_point(code_point); - } - - last_was_carriage_return = false; - } - }); + decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> { + // Replace any U+000D CARRIAGE RETURN (CR) code points, + // U+000C FORM FEED (FF) code points, + // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) + // in input by a single U+000A LINE FEED (LF) code point. + if (code_point == '\r') { + if (last_was_carriage_return) { + TRY(builder.try_append('\n')); + } else { + last_was_carriage_return = true; + } + } else { + if (last_was_carriage_return) + TRY(builder.try_append('\n')); + + if (code_point == '\n') { + if (!last_was_carriage_return) + TRY(builder.try_append('\n')); + + } else if (code_point == '\f') { + TRY(builder.try_append('\n')); + // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { + TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER)); + } else { + TRY(builder.try_append_code_point(code_point)); + } + + last_was_carriage_return = false; + } + return {}; + }) + .release_value_but_fixme_should_propagate_errors(); return builder.to_string(); }; diff --git a/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp b/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp index af2cce78aa..1420352dbc 100644 --- a/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp +++ b/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp @@ -50,7 +50,7 @@ WebIDL::ExceptionOr<DeprecatedString> TextDecoder::decode(JS::Handle<JS::Object> if (data_buffer_or_error.is_error()) return WebIDL::OperationError::create(realm(), "Failed to copy bytes from ArrayBuffer"); auto& data_buffer = data_buffer_or_error.value(); - return m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() }); + return TRY_OR_THROW_OOM(vm(), m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() })); } } diff --git a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp index b3904a5d84..8b9943944c 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp @@ -515,7 +515,7 @@ void HTMLScriptElement::resource_did_load() // we have to re-encode it to UTF-8. if (resource()->has_encoding()) { if (auto codec = TextCodec::decoder_for(resource()->encoding().value()); codec.has_value()) { - data = codec->to_utf8(data).to_byte_buffer(); + data = codec->to_utf8(data).release_value_but_fixme_should_propagate_errors().to_deprecated_string().to_byte_buffer(); } } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 3dfd4e3274..f661ee6f80 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -2800,7 +2800,7 @@ HTMLTokenizer::HTMLTokenizer(StringView input, DeprecatedString const& encoding) { auto decoder = TextCodec::decoder_for(encoding); VERIFY(decoder.has_value()); - m_decoded_input = decoder->to_utf8(input); + m_decoded_input = decoder->to_utf8(input).release_value_but_fixme_should_propagate_errors().to_deprecated_string(); m_utf8_view = Utf8View(m_decoded_input); m_utf8_iterator = m_utf8_view.begin(); m_prev_utf8_iterator = m_utf8_view.begin(); diff --git a/Userland/Libraries/LibWeb/HTML/Window.cpp b/Userland/Libraries/LibWeb/HTML/Window.cpp index 1edac12573..c03b9e7425 100644 --- a/Userland/Libraries/LibWeb/HTML/Window.cpp +++ b/Userland/Libraries/LibWeb/HTML/Window.cpp @@ -1421,9 +1421,9 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob) // NOTE: Any 8-bit encoding -> utf-8 decoder will work for this auto text_decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(text_decoder.has_value()); - auto text = text_decoder->to_utf8(decoded.release_value()); + auto text = TRY_OR_THROW_OOM(vm, text_decoder->to_utf8(decoded.release_value())); - return JS::PrimitiveString::create(vm, DeprecatedString(text)); + return JS::PrimitiveString::create(vm, text); } JS_DEFINE_NATIVE_FUNCTION(Window::btoa) diff --git a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp index c31c8982b1..e600860285 100644 --- a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp +++ b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp @@ -163,7 +163,7 @@ WebIDL::ExceptionOr<DeprecatedString> WorkerGlobalScope::atob(DeprecatedString c // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8. auto decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder.has_value()); - return decoder->to_utf8(decoded_data.value()); + return TRY_OR_THROW_OOM(vm(), decoder->to_utf8(decoded_data.value())); } } diff --git a/Userland/Libraries/LibWeb/Infra/JSON.cpp b/Userland/Libraries/LibWeb/Infra/JSON.cpp index 5bc079c626..05d73c5dfc 100644 --- a/Userland/Libraries/LibWeb/Infra/JSON.cpp +++ b/Userland/Libraries/LibWeb/Infra/JSON.cpp @@ -27,7 +27,7 @@ WebIDL::ExceptionOr<JS::Value> parse_json_bytes_to_javascript_value(JS::VM& vm, { // 1. Let string be the result of running UTF-8 decode on bytes. TextCodec::UTF8Decoder decoder; - auto string = decoder.to_utf8(bytes); + auto string = TRY_OR_THROW_OOM(vm, decoder.to_utf8(bytes)); // 2. Return the result of parsing a JSON string to an Infra value given string. return parse_json_string_to_javascript_value(vm, string); diff --git a/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp b/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp index 1f8529eb91..e12d38c38e 100644 --- a/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp +++ b/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp @@ -224,7 +224,7 @@ DeprecatedString XMLHttpRequest::get_text_response() const // If we don't support the decoder yet, let's crash instead of attempting to return something, as the result would be incorrect and create obscure bugs. VERIFY(decoder.has_value()); - return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes); + return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes).release_value_but_fixme_should_propagate_errors().to_deprecated_string(); } // https://xhr.spec.whatwg.org/#final-mime-type |