diff options
author | Luke <luke.wilde@live.co.uk> | 2020-07-18 21:17:17 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-07-21 01:08:32 +0200 |
commit | 19d6884529db681bc917f7f3d0a5cee496a76a0e (patch) | |
tree | fa81e2793bb25105236637b7a7007d79ee7f0257 /Libraries/LibWeb | |
parent | a5ecb9bd6b287e69bcd9565c0b33410527b34b58 (diff) | |
download | serenity-19d6884529db681bc917f7f3d0a5cee496a76a0e.zip |
LibWeb: Implement quirks mode detection
This allows us to determine which mode to render the page in.
Exposes "doctype" and "compatMode" on Document.
Exposes "name", "publicId" and "systemId" on DocumentType.
Diffstat (limited to 'Libraries/LibWeb')
-rw-r--r-- | Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp | 3 | ||||
-rw-r--r-- | Libraries/LibWeb/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp | 1 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Document.cpp | 16 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Document.h | 16 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/Document.idl | 3 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/DocumentType.h | 10 | ||||
-rw-r--r-- | Libraries/LibWeb/DOM/DocumentType.idl | 7 | ||||
-rw-r--r-- | Libraries/LibWeb/Forward.h | 3 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.cpp | 120 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLDocumentParser.h | 2 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLToken.h | 5 | ||||
-rw-r--r-- | Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 15 |
13 files changed, 196 insertions, 6 deletions
diff --git a/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp b/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp index 9647829240..d102fc6404 100644 --- a/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp +++ b/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp @@ -25,6 +25,7 @@ */ #include <LibWeb/Bindings/DocumentWrapper.h> +#include <LibWeb/Bindings/DocumentTypeWrapper.h> #include <LibWeb/Bindings/HTMLCanvasElementWrapper.h> #include <LibWeb/Bindings/HTMLImageElementWrapper.h> #include <LibWeb/Bindings/HTMLElementWrapper.h> @@ -41,6 +42,8 @@ NodeWrapper* wrap(JS::GlobalObject& global_object, Node& node) { if (is<Document>(node)) return static_cast<NodeWrapper*>(wrap_impl(global_object, to<Document>(node))); + if (is<DocumentType>(node)) + return static_cast<NodeWrapper*>(wrap_impl(global_object, to<DocumentType>(node))); if (is<HTMLCanvasElement>(node)) return static_cast<NodeWrapper*>(wrap_impl(global_object, to<HTMLCanvasElement>(node))); if (is<HTMLImageElement>(node)) diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index 30e79d5c3e..f8ba964892 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -156,6 +156,7 @@ endfunction() libweb_js_wrapper(EventTarget) libweb_js_wrapper(Node) libweb_js_wrapper(Document) +libweb_js_wrapper(DocumentType) libweb_js_wrapper(Element) libweb_js_wrapper(HTMLElement) libweb_js_wrapper(HTMLImageElement) diff --git a/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp b/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp index 5d626e17ba..c23fe8e996 100644 --- a/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp +++ b/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp @@ -423,6 +423,7 @@ void generate_implementation(const IDL::Interface& interface) out() << "#include <LibWeb/DOM/Element.h>"; out() << "#include <LibWeb/DOM/HTMLElement.h>"; out() << "#include <LibWeb/DOM/EventListener.h>"; + out() << "#include <LibWeb/Bindings/DocumentTypeWrapper.h>"; out() << "#include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>"; out() << "#include <LibWeb/Bindings/HTMLImageElementWrapper.h>"; out() << "#include <LibWeb/Bindings/ImageDataWrapper.h>"; diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp index a6efb1e533..feda9d7312 100644 --- a/Libraries/LibWeb/DOM/Document.cpp +++ b/Libraries/LibWeb/DOM/Document.cpp @@ -477,4 +477,20 @@ void Document::adopt_node(Node& subtree_root) }); } +const DocumentType* Document::doctype() const +{ + return first_child_of_type<DocumentType>(); +} + +const String& Document::compat_mode() const +{ + static String back_compat = "BackCompat"; + static String css1_compat = "CSS1Compat"; + + if (m_quirks_mode == QuirksMode::Yes) + return back_compat; + + return css1_compat; +} + } diff --git a/Libraries/LibWeb/DOM/Document.h b/Libraries/LibWeb/DOM/Document.h index f40f0f81cb..a6a2e1d761 100644 --- a/Libraries/LibWeb/DOM/Document.h +++ b/Libraries/LibWeb/DOM/Document.h @@ -43,6 +43,12 @@ namespace Web { +enum class QuirksMode { + No, + Limited, + Yes +}; + class Document : public ParentNode , public NonElementParentNode<Document> { @@ -142,11 +148,15 @@ public: void add_script_to_execute_as_soon_as_possible(Badge<HTMLScriptElement>, HTMLScriptElement&); NonnullRefPtrVector<HTMLScriptElement> take_scripts_to_execute_as_soon_as_possible(Badge<HTMLDocumentParser>); - bool in_quirks_mode() const { return m_quirks_mode; } - void set_quirks_mode(bool mode) { m_quirks_mode = mode; } + QuirksMode mode() const { return m_quirks_mode; } + bool in_quirks_mode() const { return m_quirks_mode == QuirksMode::Yes; } + void set_quirks_mode(QuirksMode mode) { m_quirks_mode = mode; } void adopt_node(Node&); + const DocumentType* doctype() const; + const String& compat_mode() const; + private: virtual RefPtr<LayoutNode> create_layout_node(const StyleProperties* parent_style) override; @@ -175,7 +185,7 @@ private: NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_when_parsing_has_finished; NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_as_soon_as_possible; - bool m_quirks_mode { false }; + QuirksMode m_quirks_mode { QuirksMode::No }; }; template<> diff --git a/Libraries/LibWeb/DOM/Document.idl b/Libraries/LibWeb/DOM/Document.idl index 1de360edcd..9d1184115b 100644 --- a/Libraries/LibWeb/DOM/Document.idl +++ b/Libraries/LibWeb/DOM/Document.idl @@ -6,6 +6,9 @@ interface Document : Node { ArrayFromVector querySelectorAll(DOMString selectors); Element createElement(DOMString tagName); + readonly attribute DOMString compatMode; + readonly attribute DocumentType? doctype; + readonly attribute HTMLElement? body; } diff --git a/Libraries/LibWeb/DOM/DocumentType.h b/Libraries/LibWeb/DOM/DocumentType.h index 9982e712e2..0d538efe2f 100644 --- a/Libraries/LibWeb/DOM/DocumentType.h +++ b/Libraries/LibWeb/DOM/DocumentType.h @@ -33,6 +33,8 @@ namespace Web { class DocumentType final : public Node { public: + using WrapperType = Bindings::DocumentTypeWrapper; + explicit DocumentType(Document&); virtual ~DocumentType() override; @@ -41,8 +43,16 @@ public: const String& name() const { return m_name; } void set_name(const String& name) { m_name = name; } + const String& public_id() const { return m_public_id; } + void set_public_id(const String& public_id) { m_public_id = public_id; } + + const String& system_id() const { return m_system_id; } + void set_system_id(const String& system_id) { m_system_id = system_id; } + private: String m_name; + String m_public_id; + String m_system_id; }; template<> diff --git a/Libraries/LibWeb/DOM/DocumentType.idl b/Libraries/LibWeb/DOM/DocumentType.idl new file mode 100644 index 0000000000..c0adacdcad --- /dev/null +++ b/Libraries/LibWeb/DOM/DocumentType.idl @@ -0,0 +1,7 @@ +interface DocumentType : Node { + + readonly attribute DOMString name; + readonly attribute DOMString publicId; + readonly attribute DOMString systemId; + +} diff --git a/Libraries/LibWeb/Forward.h b/Libraries/LibWeb/Forward.h index 141870b362..41ad85d60d 100644 --- a/Libraries/LibWeb/Forward.h +++ b/Libraries/LibWeb/Forward.h @@ -30,6 +30,7 @@ namespace Web { class CanvasRenderingContext2D; class Document; +class DocumentType; class Element; class Event; class EventHandler; @@ -72,11 +73,13 @@ class Text; class Timer; class Window; class XMLHttpRequest; +enum class QuirksMode; namespace Bindings { class CanvasRenderingContext2DWrapper; class DocumentWrapper; +class DocumentTypeWrapper; class ElementWrapper; class EventWrapper; class EventListenerWrapper; diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp index 5c276155d6..83ac40917b 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp @@ -46,6 +46,64 @@ namespace Web { +static Vector<FlyString> s_quirks_public_ids = { + "+//Silmaril//dtd html Pro v0r11 19970101//", + "-//AS//DTD HTML 3.0 asWedit + extensions//", + "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", + "-//IETF//DTD HTML 2.0 Level 1//", + "-//IETF//DTD HTML 2.0 Level 2//", + "-//IETF//DTD HTML 2.0 Strict Level 1//", + "-//IETF//DTD HTML 2.0 Strict Level 2//", + "-//IETF//DTD HTML 2.0 Strict//", + "-//IETF//DTD HTML 2.0//", + "-//IETF//DTD HTML 2.1E//", + "-//IETF//DTD HTML 3.0//", + "-//IETF//DTD HTML 3.2 Final//", + "-//IETF//DTD HTML 3.2//", + "-//IETF//DTD HTML 3//", + "-//IETF//DTD HTML Level 0//", + "-//IETF//DTD HTML Level 1//", + "-//IETF//DTD HTML Level 2//", + "-//IETF//DTD HTML Level 3//", + "-//IETF//DTD HTML Strict Level 0//", + "-//IETF//DTD HTML Strict Level 1//", + "-//IETF//DTD HTML Strict Level 2//", + "-//IETF//DTD HTML Strict Level 3//", + "-//IETF//DTD HTML Strict//", + "-//IETF//DTD HTML//", + "-//Metrius//DTD Metrius Presentational//", + "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", + "-//Microsoft//DTD Internet Explorer 2.0 HTML//", + "-//Microsoft//DTD Internet Explorer 2.0 Tables//", + "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", + "-//Microsoft//DTD Internet Explorer 3.0 HTML//", + "-//Microsoft//DTD Internet Explorer 3.0 Tables//", + "-//Netscape Comm. Corp.//DTD HTML//", + "-//Netscape Comm. Corp.//DTD Strict HTML//", + "-//O'Reilly and Associates//DTD HTML 2.0//", + "-//O'Reilly and Associates//DTD HTML Extended 1.0//", + "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", + "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", + "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", + "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", + "-//Spyglass//DTD HTML 2.0 Extended//", + "-//Sun Microsystems Corp.//DTD HotJava HTML//", + "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", + "-//W3C//DTD HTML 3 1995-03-24//", + "-//W3C//DTD HTML 3.2 Draft//", + "-//W3C//DTD HTML 3.2 Final//", + "-//W3C//DTD HTML 3.2//", + "-//W3C//DTD HTML 3.2S Draft//", + "-//W3C//DTD HTML 4.0 Frameset//", + "-//W3C//DTD HTML 4.0 Transitional//", + "-//W3C//DTD HTML Experimental 19960712//", + "-//W3C//DTD HTML Experimental 970421//", + "-//W3C//DTD W3 HTML//", + "-//W3O//DTD W3 HTML 3.0//", + "-//WebTechs//DTD Mozilla HTML 2.0//", + "-//WebTechs//DTD Mozilla HTML//" +}; + RefPtr<Document> parse_html_document(const StringView& data, const URL& url, const String& encoding) { HTMLDocumentParser parser(data, encoding); @@ -181,6 +239,60 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok } } +QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const +{ + if (doctype_token.m_doctype.force_quirks) + return QuirksMode::Yes; + + // NOTE: The tokenizer puts the name into lower case for us. + if (doctype_token.m_doctype.name.to_string() != "html") + return QuirksMode::Yes; + + auto public_identifier = doctype_token.m_doctype.public_identifier.to_string(); + auto system_identifier = doctype_token.m_doctype.system_identifier.to_string(); + + if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//")) + return QuirksMode::Yes; + + if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN")) + return QuirksMode::Yes; + + if (public_identifier.equals_ignoring_case("HTML")) + return QuirksMode::Yes; + + if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) + return QuirksMode::Yes; + + for (auto& public_id : s_quirks_public_ids) { + if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive)) + return QuirksMode::Yes; + } + + if (doctype_token.m_doctype.missing_system_identifier) { + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Yes; + + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Yes; + } + + if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + + if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + + if (!doctype_token.m_doctype.missing_system_identifier) { + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + } + + return QuirksMode::No; +} + void HTMLDocumentParser::handle_initial(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { @@ -196,14 +308,16 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token) if (token.is_doctype()) { auto doctype = adopt(*new DocumentType(document())); doctype->set_name(token.m_doctype.name.to_string()); + doctype->set_public_id(token.m_doctype.public_identifier.to_string()); + doctype->set_system_id(token.m_doctype.system_identifier.to_string()); document().append_child(move(doctype)); - document().set_quirks_mode(token.m_doctype.force_quirks); + document().set_quirks_mode(which_quirks_mode(token)); m_insertion_mode = InsertionMode::BeforeHTML; return; } PARSE_ERROR(); - document().set_quirks_mode(true); + document().set_quirks_mode(QuirksMode::Yes); m_insertion_mode = InsertionMode::BeforeHTML; process_using_the_rules_for(InsertionMode::BeforeHTML, token); } @@ -2612,7 +2726,7 @@ NonnullRefPtrVector<Node> HTMLDocumentParser::parse_html_fragment(Element& conte { HTMLDocumentParser parser(markup, "utf-8"); parser.m_parsing_fragment = true; - parser.document().set_quirks_mode(context_element.document().in_quirks_mode()); + parser.document().set_quirks_mode(context_element.document().mode()); if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h index 4f33776b62..ec710a2f0d 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h @@ -85,6 +85,8 @@ public: private: const char* insertion_mode_name() const; + QuirksMode which_quirks_mode(const HTMLToken&) const; + void handle_initial(HTMLToken&); void handle_before_html(HTMLToken&); void handle_before_head(HTMLToken&); diff --git a/Libraries/LibWeb/Parser/HTMLToken.h b/Libraries/LibWeb/Parser/HTMLToken.h index 33e53979a8..86636d963a 100644 --- a/Libraries/LibWeb/Parser/HTMLToken.h +++ b/Libraries/LibWeb/Parser/HTMLToken.h @@ -174,9 +174,14 @@ private: // Type::DOCTYPE struct { + // NOTE: "Missing" is a distinct state from the empty string. + StringBuilder name; + bool missing_name { true }; StringBuilder public_identifier; + bool missing_public_identifier { true }; StringBuilder system_identifier; + bool missing_system_identifier { true }; bool force_quirks { false }; } m_doctype; diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index a3bf9ffd4f..4ac64210f5 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -455,6 +455,7 @@ _StartOfFunction: { create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } ON(0) @@ -462,6 +463,7 @@ _StartOfFunction: PARSE_ERROR(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append_codepoint(0xFFFD); + m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } ON('>') @@ -483,6 +485,7 @@ _StartOfFunction: { create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } } @@ -566,12 +569,14 @@ _StartOfFunction: { PARSE_ERROR(); m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { PARSE_ERROR(); m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') @@ -606,12 +611,14 @@ _StartOfFunction: { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') @@ -645,11 +652,13 @@ _StartOfFunction: ON('"') { m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') @@ -683,11 +692,13 @@ _StartOfFunction: ON('"') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') @@ -858,12 +869,14 @@ _StartOfFunction: { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF @@ -895,11 +908,13 @@ _StartOfFunction: ON('"') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF |