diff options
author | Lorenz Steinert <lorenz@steinerts.de> | 2022-02-19 15:58:21 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-02-21 18:26:43 +0100 |
commit | db789813c9f3d07d9a336d1cee4245e64dcb9709 (patch) | |
tree | f280a0e681cef1b4316f25b765b21246439febbb | |
parent | d29d9462e9c34ff89990073deb7d7fadb66b1779 (diff) | |
download | serenity-db789813c9f3d07d9a336d1cee4245e64dcb9709.zip |
LibWeb: Add basic support for dynamic markup insertion
This implements basic support for dynamic markup insertion, adding
* Document::open()
* Document::write(Vector<String> const&)
* Document::writeln(Vector<String> const&)
* Document::close()
The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
* m_tokenizer
* m_aborted
* m_script_nesting_level
The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.
-rw-r--r-- | Userland/Libraries/LibWeb/DOM/Document.cpp | 145 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/DOM/Document.h | 17 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/DOM/Document.idl | 8 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp | 58 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h | 9 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 37 | ||||
-rw-r--r-- | Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h | 27 |
7 files changed, 282 insertions, 19 deletions
diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index e08d7ff42c..842866bc6e 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -53,6 +53,7 @@ #include <LibWeb/HTML/HTMLScriptElement.h> #include <LibWeb/HTML/HTMLTitleElement.h> #include <LibWeb/HTML/MessageEvent.h> +#include <LibWeb/HTML/Parser/HTMLParser.h> #include <LibWeb/HTML/Scripting/ExceptionReporter.h> #include <LibWeb/HTML/Scripting/WindowEnvironmentSettingsObject.h> #include <LibWeb/Layout/BlockFormattingContext.h> @@ -148,15 +149,151 @@ void Document::removed_last_ref() } // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-document-write -void Document::write(Vector<String> const& strings) +ExceptionOr<void> Document::write(Vector<String> const& strings) { - dbgln("TODO: document.write({})", strings); + // 1. If document is an XML document, then throw an "InvalidStateError" DOMException. + if (doctype() && doctype()->name() == "xml") + return DOM::InvalidStateError::create("write() called on XML document."); + + // 2. If document's throw-on-dynamic-markup-insertion counter is greater than 0, then throw an "InvalidStateError" DOMException. + if (m_throw_on_dynamic_markup_insertion_counter > 0) + return DOM::InvalidStateError::create("throw-on-dynamic-markup-insertion-counter greater than zero."); + + // 3. If document's active parser was aborted is true, then return. + if (m_active_parser_was_aborted) + return {}; + + // 4. If the insertion point is undefined, then: + if (!(m_parser && m_parser->tokenizer().is_insertion_point_defined())) { + // 1. If document's unload counter is greater than 0 or document's ignore-destructive-writes counter is greater than 0, then return. + if (m_unload_counter > 0 || m_ignore_destructive_writes_counter > 0) + return {}; + + // 2. Run the document open steps with document. + open(); + } + + // 5. Insert input into the input stream just before the insertion point. + StringBuilder builder; + builder.join("", strings); + m_parser->tokenizer().insert_input_at_insertion_point(builder.build()); + + // 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script end tag token is emitted by the tokenizer). + if (!pending_parsing_blocking_script()) + m_parser->run(); + + return {}; } // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-document-writeln -void Document::writeln(Vector<String> const& strings) +ExceptionOr<void> Document::writeln(Vector<String> const& strings) { - dbgln("TODO: document.writeln({})", strings); + + // FIXME: No need to allocate a new vector + Vector<String> new_strings; + for (auto const& element : strings) { + new_strings.append(String::formatted("{}\n", element)); + } + + return write(strings); +} + +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-document-open +ExceptionOr<Document*> Document::open(String const&, String const&) +{ + // 1. If document is an XML document, then throw an "InvalidStateError" DOMException exception. + if (doctype() && doctype()->name() == "xml") + return DOM::InvalidStateError::create("open() called on XML document."); + + // 2. If document's throw-on-dynamic-markup-insertion counter is greater than 0, then throw an "InvalidStateError" DOMException. + if (m_throw_on_dynamic_markup_insertion_counter > 0) + return DOM::InvalidStateError::create("throw-on-dynamic-markup-insertion-counter greater than zero."); + + // FIXME: 3. Let entryDocument be the entry global object's associated Document. + auto& entry_document = *this; + + // 4. If document's origin is not same origin to entryDocument's origin, then throw a "SecurityError" DOMException. + if (origin() != entry_document.origin()) + return DOM::SecurityError::create("Document.origin() not the same as entryDocument's."); + + // 5. If document has an active parser whose script nesting level is greater than 0, then return document. + if (m_parser && m_parser->script_nesting_level() > 0) + return this; + + // 6. Similarly, if document's unload counter is greater than 0, then return document. + if (m_unload_counter > 0) + return this; + + // 7. If document's active parser was aborted is true, then return document. + if (m_active_parser_was_aborted) + return this; + + // FIXME: 8. If document's browsing context is non-null and there is an existing attempt to navigate document's browsing context, then stop document loading given document. + + // FIXME: 9. For each shadow-including inclusive descendant node of document, erase all event listeners and handlers given node. + + // FIXME 10. If document is the associated Document of document's relevant global object, then erase all event listeners and handlers given document's relevant global object. + + // 11. Replace all with null within document, without firing any mutation events. + replace_all(nullptr); + + // 12. If document is fully active, then: + if (is_fully_active()) { + // 1. Let newURL be a copy of entryDocument's URL. + auto new_url = entry_document.url(); + // 2. If entryDocument is not document, then set newURL's fragment to null. + if (&entry_document != this) + new_url.set_fragment(""); + + // FIXME: 3. Run the URL and history update steps with document and newURL. + } + + // FIXME: 13. Set document's is initial about:blank to false. + + // FIXME: 14. If document's iframe load in progress flag is set, then set document's mute iframe load flag. + + // 15. Set document to no-quirks mode. + set_quirks_mode(QuirksMode::No); + + // 16. Create a new HTML parser and associate it with document. This is a script-created parser (meaning that it can be closed by the document.open() and document.close() methods, and that the tokenizer will wait for an explicit call to document.close() before emitting an end-of-file token). The encoding confidence is irrelevant. + m_parser = make<HTML::HTMLParser>(*this); + + // 17. Set the insertion point to point at just before the end of the input stream (which at this point will be empty). + m_parser->tokenizer().update_insertion_point(); + + // 18. Update the current document readiness of document to "loading". + update_readiness(HTML::DocumentReadyState::Loading); + + // 19. Return document. + return this; +} + +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#closing-the-input-stream +ExceptionOr<void> Document::close() +{ + // 1. If document is an XML document, then throw an "InvalidStateError" DOMException exception. + if (doctype() && doctype()->name() == "xml") + return DOM::InvalidStateError::create("close() called on XML document."); + + // 2. If document's throw-on-dynamic-markup-insertion counter is greater than 0, then throw an "InvalidStateError" DOMException. + if (m_throw_on_dynamic_markup_insertion_counter > 0) + return DOM::InvalidStateError::create("throw-on-dynamic-markup-insertion-counter greater than zero."); + + // 3. If there is no script-created parser associated with the document, then return. + if (!m_parser) + return {}; + + // FIXME: 4. Insert an explicit "EOF" character at the end of the parser's input stream. + m_parser->tokenizer().insert_eof(); + + // 5. If there is a pending parsing-blocking script, then return. + if (pending_parsing_blocking_script()) + return {}; + + // FIXME: 6. Run the tokenizer, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the explicit "EOF" character or spins the event loop. + m_parser->run(); + + return {}; } Origin Document::origin() const diff --git a/Userland/Libraries/LibWeb/DOM/Document.h b/Userland/Libraries/LibWeb/DOM/Document.h index e9d2962314..650b14b36e 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.h +++ b/Userland/Libraries/LibWeb/DOM/Document.h @@ -244,8 +244,11 @@ public: Window& window() { return *m_window; } - void write(Vector<String> const& strings); - void writeln(Vector<String> const& strings); + ExceptionOr<void> write(Vector<String> const& strings); + ExceptionOr<void> writeln(Vector<String> const& strings); + + ExceptionOr<Document*> open(String const& = "", String const& = ""); + ExceptionOr<void> close(); Window* default_view() { return m_window; } @@ -355,6 +358,9 @@ private: RefPtr<Core::Timer> m_style_update_timer; RefPtr<Core::Timer> m_layout_update_timer; + OwnPtr<HTML::HTMLParser> m_parser; + bool m_active_parser_was_aborted { false }; + String m_source; OwnPtr<JS::Interpreter> m_interpreter; @@ -385,6 +391,12 @@ private: u32 m_ignore_destructive_writes_counter { 0 }; + // https://html.spec.whatwg.org/multipage/browsing-the-web.html#unload-counter + u32 m_unload_counter { 0 }; + + // https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#throw-on-dynamic-markup-insertion-counter + u32 m_throw_on_dynamic_markup_insertion_counter { 0 }; + // https://html.spec.whatwg.org/multipage/semantics.html#script-blocking-style-sheet-counter u32 m_script_blocking_style_sheet_counter { 0 }; @@ -403,5 +415,4 @@ private: bool m_needs_layout { false }; }; - } diff --git a/Userland/Libraries/LibWeb/DOM/Document.idl b/Userland/Libraries/LibWeb/DOM/Document.idl index 90913d04c6..03aff632ea 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.idl +++ b/Userland/Libraries/LibWeb/DOM/Document.idl @@ -32,8 +32,12 @@ interface Document : Node { readonly attribute Window? defaultView; - undefined write(DOMString... text); - undefined writeln(DOMString... text); + [CEReactions] Document open(optional DOMString unused1, optional DOMString unused2); + // FIXME: implement ExceptionOr<Window> Document::open(...) + // WindowProxy? open(USVString url, DOMString name, DOMString features); + [CEReactions] undefined close(); + [CEReactions] undefined write(DOMString... text); + [CEReactions] undefined writeln(DOMString... text); attribute DOMString cookie; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 878fbc950b..89e9d34231 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -137,17 +137,24 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String& m_document->set_encoding(standardized_encoding.value()); } +HTMLParser::HTMLParser(DOM::Document& document) + : m_document(document) +{ + m_tokenizer.set_parser({}, *this); +} + HTMLParser::~HTMLParser() { m_document->set_should_invalidate_styles_on_attribute_changes(true); } -void HTMLParser::run(const AK::URL& url) +void HTMLParser::run() { - m_document->set_url(url); - m_document->set_source(m_tokenizer.source()); - for (;;) { + // FIXME: Find a better way to say that we come from Document::close() and want to process EOF. + if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached()) + return; + auto optional_token = m_tokenizer.next_token(); if (!optional_token.has_value()) break; @@ -186,7 +193,13 @@ void HTMLParser::run(const AK::URL& url) } flush_character_insertions(); +} +void HTMLParser::run(const AK::URL& url) +{ + m_document->set_url(url); + m_document->set_source(m_tokenizer.source()); + run(); the_end(); } @@ -197,7 +210,8 @@ void HTMLParser::the_end() // FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return. - // FIXME: 2. Set the insertion point to undefined. + // 2. Set the insertion point to undefined. + m_tokenizer.undefine_insertion_point(); // 3. Update the current document readiness to "interactive". m_document->update_readiness(HTML::DocumentReadyState::Interactive); @@ -2003,6 +2017,7 @@ void HTMLParser::decrement_script_nesting_level() --m_script_nesting_level; } +// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata void HTMLParser::handle_text(HTMLToken& token) { if (token.is_character()) { @@ -2025,13 +2040,18 @@ void HTMLParser::handle_text(HTMLToken& token) NonnullRefPtr<HTMLScriptElement> script = verify_cast<HTMLScriptElement>(current_node()); (void)m_stack_of_open_elements.pop(); m_insertion_mode = m_original_insertion_mode; - // FIXME: Handle tokenizer insertion point stuff here. + // Let the old insertion point have the same value as the current insertion point. + m_tokenizer.store_insertion_point(); + // Let the insertion point be just before the next input character. + m_tokenizer.update_insertion_point(); increment_script_nesting_level(); + // FIXME: Check if active speculative HTML parser is null. script->prepare_script({}); decrement_script_nesting_level(); if (script_nesting_level() == 0) m_parser_pause_flag = false; - // FIXME: Handle tokenizer insertion point stuff here too. + // Let the insertion point have the value of the old insertion point. + m_tokenizer.restore_insertion_point(); while (document().pending_parsing_blocking_script()) { if (script_nesting_level() != 0) { @@ -2065,7 +2085,8 @@ void HTMLParser::handle_text(HTMLToken& token) m_tokenizer.set_blocked(false); - // FIXME: Handle tokenizer insertion point stuff here too. + // Let the insertion point be just before the next input character. + m_tokenizer.update_insertion_point(); VERIFY(script_nesting_level() == 0); increment_script_nesting_level(); @@ -2076,7 +2097,8 @@ void HTMLParser::handle_text(HTMLToken& token) VERIFY(script_nesting_level() == 0); m_parser_pause_flag = false; - // FIXME: Handle tokenizer insertion point stuff here too. + // Let the insertion point be undefined again. + m_tokenizer.undefine_insertion_point(); } } return; @@ -2986,8 +3008,26 @@ void HTMLParser::process_using_the_rules_for_foreign_content(HTMLToken& token) if (token.is_end_tag() && current_node().namespace_() == Namespace::SVG && current_node().tag_name() == SVG::TagNames::script) { ScriptEndTag: + // Pop the current node off the stack of open elements. (void)m_stack_of_open_elements.pop(); + // Let the old insertion point have the same value as the current insertion point. + m_tokenizer.store_insertion_point(); + // Let the insertion point be just before the next input character. + m_tokenizer.update_insertion_point(); + // Increment the parser's script nesting level by one. + increment_script_nesting_level(); + // Set the parser pause flag to true. + m_parser_pause_flag = true; + // FIXME: Implement SVG script parsing. TODO(); + // Decrement the parser's script nesting level by one. + decrement_script_nesting_level(); + // If the parser's script nesting level is zero, then set the parser pause flag to false. + if (script_nesting_level() == 0) + m_parser_pause_flag = false; + + // Let the insertion point have the value of the old insertion point. + m_tokenizer.restore_insertion_point(); } if (token.is_end_tag()) { diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h index f8803dc801..cbfa767632 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h @@ -46,10 +46,12 @@ class HTMLParser { public: HTMLParser(DOM::Document&, StringView input, const String& encoding); + HTMLParser(DOM::Document&); ~HTMLParser(); static NonnullOwnPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, const ByteBuffer& input); + void run(); void run(const AK::URL&); DOM::Document& document(); @@ -67,6 +69,12 @@ public: static bool is_special_tag(const FlyString& tag_name, const FlyString& namespace_); + HTMLTokenizer& tokenizer() { return m_tokenizer; } + + bool aborted() const { return m_aborted; } + + size_t script_nesting_level() const { return m_script_nesting_level; } + private: const char* insertion_mode_name() const; @@ -127,7 +135,6 @@ private: void parse_generic_raw_text_element(HTMLToken&); void increment_script_nesting_level(); void decrement_script_nesting_level(); - size_t script_nesting_level() const { return m_script_nesting_level; } void reset_the_insertion_mode_appropriately(); void adjust_mathml_attributes(HTMLToken&); diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 59fe2e4bea..0ff40c371c 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -2780,6 +2780,15 @@ void HTMLTokenizer::create_new_token(HTMLToken::Type type) m_current_token.set_start_position({}, nth_last_position(offset)); } +HTMLTokenizer::HTMLTokenizer() +{ + m_decoded_input = ""; + m_utf8_view = Utf8View(m_decoded_input); + m_utf8_iterator = m_utf8_view.begin(); + m_prev_utf8_iterator = m_utf8_view.begin(); + m_source_positions.empend(0u, 0u); +} + HTMLTokenizer::HTMLTokenizer(StringView input, String const& encoding) { auto* decoder = TextCodec::decoder_for(encoding); @@ -2787,9 +2796,37 @@ HTMLTokenizer::HTMLTokenizer(StringView input, String const& encoding) m_decoded_input = decoder->to_utf8(input); m_utf8_view = Utf8View(m_decoded_input); m_utf8_iterator = m_utf8_view.begin(); + m_prev_utf8_iterator = m_utf8_view.begin(); m_source_positions.empend(0u, 0u); } +void HTMLTokenizer::insert_input_at_insertion_point(String const& input) +{ + auto utf8_iterator_byte_offset = m_utf8_view.byte_offset_of(m_utf8_iterator); + + // FIXME: Implement a InputStream to handle insertion_point and iterators. + StringBuilder builder {}; + builder.append(m_decoded_input.substring(0, m_insertion_point.position)); + builder.append(input); + builder.append(m_decoded_input.substring(m_insertion_point.position)); + m_decoded_input = builder.build(); + + m_utf8_view = Utf8View(m_decoded_input); + m_utf8_iterator = m_utf8_view.iterator_at_byte_offset(utf8_iterator_byte_offset); + + m_insertion_point.position += input.length(); +} + +void HTMLTokenizer::insert_eof() +{ + m_explicit_eof_inserted = true; +} + +bool HTMLTokenizer::is_eof_inserted() +{ + return m_explicit_eof_inserted; +} + void HTMLTokenizer::will_switch_to([[maybe_unused]] State new_state) { dbgln_if(TOKENIZER_TRACE_DEBUG, "[{}] Switch to {}", state_name(m_state), state_name(new_state)); diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h index dd1dc76c85..05f5c5b697 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h @@ -101,6 +101,7 @@ namespace Web::HTML { class HTMLTokenizer { public: + explicit HTMLTokenizer(); explicit HTMLTokenizer(StringView input, String const& encoding); enum class State { @@ -124,6 +125,24 @@ public: String source() const { return m_decoded_input; } + void insert_input_at_insertion_point(String const& input); + void insert_eof(); + bool is_eof_inserted(); + + bool is_insertion_point_defined() const { return m_insertion_point.defined; } + bool is_insertion_point_reached() + { + return m_insertion_point.defined && m_insertion_point.position >= m_utf8_view.iterator_offset(m_utf8_iterator); + } + void undefine_insertion_point() { m_insertion_point.defined = false; } + void store_insertion_point() { m_old_insertion_point = m_insertion_point; } + void restore_insertion_point() { m_insertion_point = m_old_insertion_point; } + void update_insertion_point() + { + m_insertion_point.defined = true; + m_insertion_point.position = m_utf8_view.iterator_offset(m_utf8_iterator); + } + private: void skip(size_t count); Optional<u32> next_code_point(); @@ -163,6 +182,13 @@ private: String m_decoded_input; + struct InsertionPoint { + size_t position { 0 }; + bool defined { false }; + }; + InsertionPoint m_insertion_point {}; + InsertionPoint m_old_insertion_point {}; + Utf8View m_utf8_view; Utf8CodePointIterator m_utf8_iterator; Utf8CodePointIterator m_prev_utf8_iterator; @@ -172,6 +198,7 @@ private: Optional<String> m_last_emitted_start_tag_name; + bool m_explicit_eof_inserted { false }; bool m_has_emitted_eof { false }; Queue<HTMLToken> m_queued_tokens; |