From 6fe4fcb74b9a3a8d9990f1f96dec7ab501961d90 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 11 May 2023 16:56:01 +0200 Subject: LibWeb: Add a class to represent the "source set" concept from HTML Also comes with a little extra CSS parser helper for parsing "sizes" attributes in images. --- Userland/Libraries/LibWeb/CMakeLists.txt | 1 + Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp | 47 +++ Userland/Libraries/LibWeb/CSS/Parser/Parser.h | 2 + Userland/Libraries/LibWeb/HTML/SourceSet.cpp | 395 ++++++++++++++++++++++++ Userland/Libraries/LibWeb/HTML/SourceSet.h | 55 ++++ 5 files changed, 500 insertions(+) create mode 100644 Userland/Libraries/LibWeb/HTML/SourceSet.cpp create mode 100644 Userland/Libraries/LibWeb/HTML/SourceSet.h (limited to 'Userland/Libraries') diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 3c814b461f..d317742369 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -350,6 +350,7 @@ set(SOURCES HTML/Scripting/Script.cpp HTML/Scripting/WindowEnvironmentSettingsObject.cpp HTML/SessionHistoryEntry.cpp + HTML/SourceSet.cpp HTML/Storage.cpp HTML/StructuredSerialize.cpp HTML/SubmitEvent.cpp diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp index f0b890935e..695cae6b36 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp @@ -7820,4 +7820,51 @@ Optional parse_css_supports_condition(CSS::Parser::ParsingCo return parser.parse_as_supports_condition(); } +// https://html.spec.whatwg.org/multipage/images.html#parse-a-srcset-attribute +CSS::Length CSS::Parser::Parser::parse_as_sizes_attribute() +{ + Optional size; + + // When asked to parse a sizes attribute from an element, + // parse a comma-separated list of component values from the value of the element's sizes attribute + // (or the empty string, if the attribute is absent), and let unparsed sizes list be the result. + auto unparsed_sizes_list = parse_a_comma_separated_list_of_component_values(m_token_stream); + + // For each unparsed size in unparsed sizes list: + for (auto& unparsed_size : unparsed_sizes_list) { + // 1. Remove all consecutive s from the end of unparsed size. + // If unparsed size is now empty, that is a parse error; continue. + while (!unparsed_size.is_empty() && unparsed_size.last().is_token() && unparsed_size.last().token().is(Token::Type::Whitespace)) + unparsed_size.take_last(); + if (unparsed_size.is_empty()) + continue; + + // 2. If the last component value in unparsed size is a valid non-negative , + // let size be its value and remove the component value from unparsed size. + // FIXME: Any CSS function other than the math functions is invalid. + // Otherwise, there is a parse error; continue. + auto length = parse_length(unparsed_size.last()); + if (length.has_value() && length.value().raw_value() >= 0) { + size = length.value(); + unparsed_size.take_last(); + } else { + continue; + } + + // 3. Remove all consecutive s from the end of unparsed size. + // If unparsed size is now empty, return size and exit this algorithm. + // If this was not the last item in unparsed sizes list, that is a parse error. + while (!unparsed_size.is_empty() && unparsed_size.last().is_token() && unparsed_size.last().token().is(Token::Type::Whitespace)) + unparsed_size.take_last(); + if (unparsed_size.is_empty()) + return size.value(); + + // FIXME: 4. Parse the remaining component values in unparsed size as a . + // If it does not parse correctly, or it does parse correctly but the evaluates to false, continue. [MQ] + dbgln("FIXME: Implement parsing of media conditions in sizes attribute"); + } + + return CSS::Length(100, CSS::Length::Type::Vw); +} + } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h index 803eabac33..b8a06ad8ca 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h @@ -90,6 +90,8 @@ public: static ErrorOr> parse_css_value(Badge, ParsingContext const&, PropertyID, Vector const&); static ErrorOr> parse_calculated_value(Badge, ParsingContext const&, Vector const&); + CSS::Length parse_as_sizes_attribute(); + private: Parser(ParsingContext const&, Vector); diff --git a/Userland/Libraries/LibWeb/HTML/SourceSet.cpp b/Userland/Libraries/LibWeb/HTML/SourceSet.cpp new file mode 100644 index 0000000000..5584224e4e --- /dev/null +++ b/Userland/Libraries/LibWeb/HTML/SourceSet.cpp @@ -0,0 +1,395 @@ +/* + * Copyright (c) 2023, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include + +namespace Web::HTML { + +SourceSet::SourceSet() + : m_source_size(CSS::Length::make_auto()) +{ +} + +bool SourceSet::is_empty() const +{ + return m_sources.is_empty(); +} + +// https://html.spec.whatwg.org/multipage/images.html#select-an-image-source-from-a-source-set +ImageSourceAndPixelDensity SourceSet::select_an_image_source() +{ + // FIXME: 1. If an entry b in sourceSet has the same associated pixel density descriptor as an earlier entry a in sourceSet, + // then remove entry b. + // Repeat this step until none of the entries in sourceSet have the same associated pixel density descriptor + // as an earlier entry. + + // FIXME: 2. In an implementation-defined manner, choose one image source from sourceSet. Let this be selectedSource. + + // FIXME: 3. Return selectedSource and its associated pixel density. + + return { m_sources.first(), 1.0f }; +} + +static StringView collect_a_sequence_of_code_points(Function condition, StringView input, size_t& position) +{ + // 1. Let result be the empty string. + // 2. While position doesn’t point past the end of input and the code point at position within input meets the condition condition: + // 1. Append that code point to the end of result. + // 2. Advance position by 1. + // 3. Return result. + + size_t start = position; + while (position < input.length() && condition(input[position])) + ++position; + return input.substring_view(start, position - start); +} + +// https://html.spec.whatwg.org/multipage/images.html#parse-a-srcset-attribute +SourceSet parse_a_srcset_attribute(StringView input) +{ + // 1. Let input be the value passed to this algorithm. + + // 2. Let position be a pointer into input, initially pointing at the start of the string. + size_t position = 0; + + // 3. Let candidates be an initially empty source set. + SourceSet candidates; + +splitting_loop: + // 4. Splitting loop: Collect a sequence of code points that are ASCII whitespace or U+002C COMMA characters from input given position. + // If any U+002C COMMA characters were collected, that is a parse error. + collect_a_sequence_of_code_points( + [](u32 code_point) { + if (code_point == ',') { + // FIXME: Report a parse error somehow. + return true; + } + return Infra::is_ascii_whitespace(code_point); + }, + input, position); + + // 5. If position is past the end of input, return candidates. + if (position >= input.length()) { + return candidates; + } + + // 6. Collect a sequence of code points that are not ASCII whitespace from input given position, and let that be url. + auto url = collect_a_sequence_of_code_points( + [](u32 code_point) { return !Infra::is_ascii_whitespace(code_point); }, + input, position); + + // 7. Let descriptors be a new empty list. + Vector descriptors; + + // 8. If url ends with U+002C (,), then: + if (url.ends_with(',')) { + // 1. Remove all trailing U+002C COMMA characters from url. If this removed more than one character, that is a parse error. + while (url.ends_with(',')) + url = url.substring_view(0, url.length() - 1); + } + // Otherwise: + else { + // 1. Descriptor tokenizer: Skip ASCII whitespace within input given position. + collect_a_sequence_of_code_points( + [](u32 code_point) { return Infra::is_ascii_whitespace(code_point); }, + input, position); + + // 2. Let current descriptor be the empty string. + StringBuilder current_descriptor; + + enum class State { + InDescriptor, + InParens, + AfterDescriptor, + }; + // 3. Let state be in descriptor. + auto state = State::InDescriptor; + + // 4. Let c be the character at position. Do the following depending on the value of state. + // For the purpose of this step, "EOF" is a special character representing that position is past the end of input. + for (;;) { + Optional c; + if (position < input.length()) { + c = input[position]; + } + + switch (state) { + // - In descriptor + case State::InDescriptor: + // Do the following, depending on the value of c: + + // - ASCII whitespace + if (c.has_value() && Infra::is_ascii_whitespace(c.value())) { + // If current descriptor is not empty, append current descriptor to descriptors and let current descriptor be the empty string. + if (!current_descriptor.is_empty()) { + descriptors.append(current_descriptor.to_string().release_value_but_fixme_should_propagate_errors()); + } + // Set state to after descriptor. + state = State::AfterDescriptor; + } + // U+002C COMMA (,) + else if (c.has_value() && c.value() == ',') { + // Advance position to the next character in input. + position += 1; + + // If current descriptor is not empty, append current descriptor to descriptors. + if (!current_descriptor.is_empty()) { + descriptors.append(current_descriptor.to_string().release_value_but_fixme_should_propagate_errors()); + } + + // Jump to the step labeled descriptor parser. + goto descriptor_parser; + } + + // U+0028 LEFT PARENTHESIS (() + else if (c.has_value() && c.value() == '(') { + // Append c to current descriptor. + current_descriptor.try_append_code_point(c.value()).release_value_but_fixme_should_propagate_errors(); + + // Set state to in parens. + state = State::InParens; + } + // EOF + else if (!c.has_value()) { + // If current descriptor is not empty, append current descriptor to descriptors. + if (!current_descriptor.is_empty()) { + descriptors.append(current_descriptor.to_string().release_value_but_fixme_should_propagate_errors()); + } + + // Jump to the step labeled descriptor parser. + goto descriptor_parser; + } + // Anything else + else { + // Append c to current descriptor. + current_descriptor.try_append_code_point(c.value()).release_value_but_fixme_should_propagate_errors(); + } + break; + + // - In parens + case State::InParens: + // Do the following, depending on the value of c: + // U+0029 RIGHT PARENTHESIS ()) + if (c.has_value() && c.value() == ')') { + // Append c to current descriptor. + current_descriptor.try_append_code_point(c.value()).release_value_but_fixme_should_propagate_errors(); + // Set state to in descriptor. + state = State::InDescriptor; + } + // EOF + else if (!c.has_value()) { + // Append current descriptor to descriptors. + descriptors.append(current_descriptor.to_string().release_value_but_fixme_should_propagate_errors()); + + // Jump to the step labeled descriptor parser. + goto descriptor_parser; + } + // Anything else + else { + // Append c to current descriptor. + current_descriptor.try_append_code_point(c.value()).release_value_but_fixme_should_propagate_errors(); + } + break; + + // - After descriptor + case State::AfterDescriptor: + // Do the following, depending on the value of c: + // ASCII whitespace + if (c.has_value() && Infra::is_ascii_whitespace(c.value())) { + // Stay in this state. + } + // EOF + else if (!c.has_value()) { + // Jump to the step labeled descriptor parser. + goto descriptor_parser; + } + // Anything else + else { + // Set state to in descriptor. + state = State::InDescriptor; + // Set position to the previous character in input. + position -= 1; + } + break; + } + // Advance position to the next character in input. Repeat this step. + position += 1; + } + } +descriptor_parser: + // 9. Descriptor parser: Let error be no. + bool error = false; + + // 10. Let width be absent. + Optional width; + + // 11. Let density be absent. + Optional density; + + // 12. Let future-compat-h be absent. + Optional future_compat_h; + + // 13. For each descriptor in descriptors, run the appropriate set of steps from the following list: + for (auto& descriptor : descriptors) { + auto last_character = descriptor.bytes_as_string_view().bytes().last(); + auto descriptor_without_last_character = descriptor.bytes_as_string_view().substring_view(0, descriptor.bytes_as_string_view().length() - 1); + + auto as_int = descriptor_without_last_character.to_int(); + auto as_float = descriptor_without_last_character.to_float(); + + // - If the descriptor consists of a valid non-negative integer followed by a U+0077 LATIN SMALL LETTER W character + if (last_character == 'w' && as_int.has_value()) { + // NOOP: 1. If the user agent does not support the sizes attribute, let error be yes. + + // 2. If width and density are not both absent, then let error be yes. + + if (width.has_value() || density.has_value()) { + error = true; + } + + // FIXME: 3. Apply the rules for parsing non-negative integers to the descriptor. + // If the result is zero, let error be yes. Otherwise, let width be the result. + width = as_int.value(); + } + + // - If the descriptor consists of a valid floating-point number followed by a U+0078 LATIN SMALL LETTER X character + else if (last_character == 'x' && as_float.has_value()) { + // 1. If width, density and future-compat-h are not all absent, then let error be yes. + if (width.has_value() || density.has_value() || future_compat_h.has_value()) { + error = true; + } + + // FIXME: 2. Apply the rules for parsing floating-point number values to the descriptor. + // If the result is less than zero, let error be yes. Otherwise, let density be the result. + density = as_float.value(); + } + // - If the descriptor consists of a valid non-negative integer followed by a U+0068 LATIN SMALL LETTER H character + else if (last_character == 'h' && as_int.has_value()) { + // This is a parse error. + // 1. If future-compat-h and density are not both absent, then let error be yes. + if (future_compat_h.has_value() || density.has_value()) { + error = true; + } + // FIXME: 2. Apply the rules for parsing non-negative integers to the descriptor. + // If the result is zero, let error be yes. Otherwise, let future-compat-h be the result. + future_compat_h = as_int.value(); + } + // - Anything else + else { + // Let error be yes. + error = true; + } + } + + // 14. If future-compat-h is not absent and width is absent, let error be yes. + if (future_compat_h.has_value() && !width.has_value()) { + error = true; + } + + // 15. If error is still no, then append a new image source to candidates whose URL is url, + // associated with a width width if not absent and a pixel density density if not absent. + // Otherwise, there is a parse error. + if (!error) { + ImageSource source; + source.url = String::from_utf8(url).release_value_but_fixme_should_propagate_errors(); + if (width.has_value()) + source.descriptor = ImageSource::WidthDescriptorValue { width.value() }; + else if (density.has_value()) + source.descriptor = ImageSource::PixelDensityDescriptorValue { density.value() }; + candidates.m_sources.append(move(source)); + } + + // 16. Return to the step labeled splitting loop. + goto splitting_loop; +} + +// https://html.spec.whatwg.org/multipage/images.html#parse-a-sizes-attribute +CSS::Length parse_a_sizes_attribute(DOM::Document const& document, StringView sizes) +{ + auto css_parser = CSS::Parser::Parser::create(CSS::Parser::ParsingContext { document }, sizes).release_value_but_fixme_should_propagate_errors(); + return css_parser.parse_as_sizes_attribute(); +} + +// https://html.spec.whatwg.org/multipage/images.html#create-a-source-set +SourceSet SourceSet::create(DOM::Document const& document, String default_source, String srcset, String sizes) +{ + // 1. Let source set be an empty source set. + SourceSet source_set; + + // 2. If srcset is not an empty string, then set source set to the result of parsing srcset. + if (!srcset.is_empty()) + source_set = parse_a_srcset_attribute(srcset); + + // 3. Let source size be the result of parsing sizes. + auto source_size = parse_a_sizes_attribute(document, sizes); + + // 4. If default source is not the empty string and source set does not contain an image source + // with a pixel density descriptor value of 1, and no image source with a width descriptor, + // append default source to source set. + if (!default_source.is_empty()) { + bool contains_image_source_with_pixel_density_descriptor_value_of_1 = false; + bool contains_image_source_with_width_descriptor = false; + for (auto& source : source_set.m_sources) { + if (source.descriptor.has()) { + if (source.descriptor.get().value == 1.0f) + contains_image_source_with_pixel_density_descriptor_value_of_1 = true; + } + if (source.descriptor.has()) + contains_image_source_with_width_descriptor = true; + } + if (!contains_image_source_with_pixel_density_descriptor_value_of_1 && !contains_image_source_with_width_descriptor) + source_set.m_sources.append({ .url = default_source, .descriptor = {} }); + } + + // 5. Normalize the source densities of source set. + source_set.normalize_source_densities(); + + // 6. Return source set. + return source_set; +} + +// https://html.spec.whatwg.org/multipage/images.html#normalise-the-source-densities +void SourceSet::normalize_source_densities() +{ + // 1. Let source size be source set's source size. + auto source_size = m_source_size; + + // 2. For each image source in source set: + for (auto& image_source : m_sources) { + // 1. If the image source has a pixel density descriptor, continue to the next image source. + if (image_source.descriptor.has()) + continue; + + // 2. Otherwise, if the image source has a width descriptor, + // replace the width descriptor with a pixel density descriptor + // with a value of the width descriptor value divided by the source size and a unit of x. + if (image_source.descriptor.has()) { + auto& width_descriptor = image_source.descriptor.get(); + if (source_size.is_absolute()) { + image_source.descriptor = ImageSource::PixelDensityDescriptorValue { + .value = (width_descriptor.value / source_size.absolute_length_to_px()).value() + }; + } else { + dbgln("FIXME: Handle relative sizes: {}", source_size); + image_source.descriptor = ImageSource::PixelDensityDescriptorValue { + .value = 1, + }; + } + } + + // 3. Otherwise, give the image source a pixel density descriptor of 1x. + else { + image_source.descriptor = ImageSource::PixelDensityDescriptorValue { + .value = 1.0f + }; + } + } +} +} diff --git a/Userland/Libraries/LibWeb/HTML/SourceSet.h b/Userland/Libraries/LibWeb/HTML/SourceSet.h new file mode 100644 index 0000000000..a88b33258c --- /dev/null +++ b/Userland/Libraries/LibWeb/HTML/SourceSet.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Web::HTML { + +// https://html.spec.whatwg.org/multipage/images.html#image-source +struct ImageSource { + struct PixelDensityDescriptorValue { + float value { 0 }; + }; + + struct WidthDescriptorValue { + CSSPixels value { 0 }; + }; + + String url; + Variant descriptor; +}; + +struct ImageSourceAndPixelDensity { + ImageSource source; + float pixel_density { 1.0f }; +}; + +// https://html.spec.whatwg.org/multipage/images.html#source-set +struct SourceSet { + static SourceSet create(DOM::Document const&, String default_source, String srcset, String sizes); + + [[nodiscard]] bool is_empty() const; + + // https://html.spec.whatwg.org/multipage/images.html#select-an-image-source-from-a-source-set + [[nodiscard]] ImageSourceAndPixelDensity select_an_image_source(); + + // https://html.spec.whatwg.org/multipage/images.html#normalise-the-source-densities + void normalize_source_densities(); + + SourceSet(); + + Vector m_sources; + CSS::Length m_source_size; +}; + +SourceSet parse_a_srcset_attribute(StringView); +CSS::Length parse_a_sizes_attribute(DOM::Document const&, StringView); + +} -- cgit v1.2.3