From 19d6884529db681bc917f7f3d0a5cee496a76a0e Mon Sep 17 00:00:00 2001 From: Luke Date: Sat, 18 Jul 2020 21:17:17 +0100 Subject: LibWeb: Implement quirks mode detection This allows us to determine which mode to render the page in. Exposes "doctype" and "compatMode" on Document. Exposes "name", "publicId" and "systemId" on DocumentType. --- Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp | 3 + Libraries/LibWeb/CMakeLists.txt | 1 + .../LibWeb/CodeGenerators/WrapperGenerator.cpp | 1 + Libraries/LibWeb/DOM/Document.cpp | 16 +++ Libraries/LibWeb/DOM/Document.h | 16 ++- Libraries/LibWeb/DOM/Document.idl | 3 + Libraries/LibWeb/DOM/DocumentType.h | 10 ++ Libraries/LibWeb/DOM/DocumentType.idl | 7 ++ Libraries/LibWeb/Forward.h | 3 + Libraries/LibWeb/Parser/HTMLDocumentParser.cpp | 120 ++++++++++++++++++++- Libraries/LibWeb/Parser/HTMLDocumentParser.h | 2 + Libraries/LibWeb/Parser/HTMLToken.h | 5 + Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 15 +++ 13 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 Libraries/LibWeb/DOM/DocumentType.idl (limited to 'Libraries/LibWeb') diff --git a/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp b/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp index 9647829240..d102fc6404 100644 --- a/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp +++ b/Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp @@ -25,6 +25,7 @@ */ #include +#include #include #include #include @@ -41,6 +42,8 @@ NodeWrapper* wrap(JS::GlobalObject& global_object, Node& node) { if (is(node)) return static_cast(wrap_impl(global_object, to(node))); + if (is(node)) + return static_cast(wrap_impl(global_object, to(node))); if (is(node)) return static_cast(wrap_impl(global_object, to(node))); if (is(node)) diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index 30e79d5c3e..f8ba964892 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -156,6 +156,7 @@ endfunction() libweb_js_wrapper(EventTarget) libweb_js_wrapper(Node) libweb_js_wrapper(Document) +libweb_js_wrapper(DocumentType) libweb_js_wrapper(Element) libweb_js_wrapper(HTMLElement) libweb_js_wrapper(HTMLImageElement) diff --git a/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp b/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp index 5d626e17ba..c23fe8e996 100644 --- a/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp +++ b/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp @@ -423,6 +423,7 @@ void generate_implementation(const IDL::Interface& interface) out() << "#include "; out() << "#include "; out() << "#include "; + out() << "#include "; out() << "#include "; out() << "#include "; out() << "#include "; diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp index a6efb1e533..feda9d7312 100644 --- a/Libraries/LibWeb/DOM/Document.cpp +++ b/Libraries/LibWeb/DOM/Document.cpp @@ -477,4 +477,20 @@ void Document::adopt_node(Node& subtree_root) }); } +const DocumentType* Document::doctype() const +{ + return first_child_of_type(); +} + +const String& Document::compat_mode() const +{ + static String back_compat = "BackCompat"; + static String css1_compat = "CSS1Compat"; + + if (m_quirks_mode == QuirksMode::Yes) + return back_compat; + + return css1_compat; +} + } diff --git a/Libraries/LibWeb/DOM/Document.h b/Libraries/LibWeb/DOM/Document.h index f40f0f81cb..a6a2e1d761 100644 --- a/Libraries/LibWeb/DOM/Document.h +++ b/Libraries/LibWeb/DOM/Document.h @@ -43,6 +43,12 @@ namespace Web { +enum class QuirksMode { + No, + Limited, + Yes +}; + class Document : public ParentNode , public NonElementParentNode { @@ -142,11 +148,15 @@ public: void add_script_to_execute_as_soon_as_possible(Badge, HTMLScriptElement&); NonnullRefPtrVector take_scripts_to_execute_as_soon_as_possible(Badge); - bool in_quirks_mode() const { return m_quirks_mode; } - void set_quirks_mode(bool mode) { m_quirks_mode = mode; } + QuirksMode mode() const { return m_quirks_mode; } + bool in_quirks_mode() const { return m_quirks_mode == QuirksMode::Yes; } + void set_quirks_mode(QuirksMode mode) { m_quirks_mode = mode; } void adopt_node(Node&); + const DocumentType* doctype() const; + const String& compat_mode() const; + private: virtual RefPtr create_layout_node(const StyleProperties* parent_style) override; @@ -175,7 +185,7 @@ private: NonnullRefPtrVector m_scripts_to_execute_when_parsing_has_finished; NonnullRefPtrVector m_scripts_to_execute_as_soon_as_possible; - bool m_quirks_mode { false }; + QuirksMode m_quirks_mode { QuirksMode::No }; }; template<> diff --git a/Libraries/LibWeb/DOM/Document.idl b/Libraries/LibWeb/DOM/Document.idl index 1de360edcd..9d1184115b 100644 --- a/Libraries/LibWeb/DOM/Document.idl +++ b/Libraries/LibWeb/DOM/Document.idl @@ -6,6 +6,9 @@ interface Document : Node { ArrayFromVector querySelectorAll(DOMString selectors); Element createElement(DOMString tagName); + readonly attribute DOMString compatMode; + readonly attribute DocumentType? doctype; + readonly attribute HTMLElement? body; } diff --git a/Libraries/LibWeb/DOM/DocumentType.h b/Libraries/LibWeb/DOM/DocumentType.h index 9982e712e2..0d538efe2f 100644 --- a/Libraries/LibWeb/DOM/DocumentType.h +++ b/Libraries/LibWeb/DOM/DocumentType.h @@ -33,6 +33,8 @@ namespace Web { class DocumentType final : public Node { public: + using WrapperType = Bindings::DocumentTypeWrapper; + explicit DocumentType(Document&); virtual ~DocumentType() override; @@ -41,8 +43,16 @@ public: const String& name() const { return m_name; } void set_name(const String& name) { m_name = name; } + const String& public_id() const { return m_public_id; } + void set_public_id(const String& public_id) { m_public_id = public_id; } + + const String& system_id() const { return m_system_id; } + void set_system_id(const String& system_id) { m_system_id = system_id; } + private: String m_name; + String m_public_id; + String m_system_id; }; template<> diff --git a/Libraries/LibWeb/DOM/DocumentType.idl b/Libraries/LibWeb/DOM/DocumentType.idl new file mode 100644 index 0000000000..c0adacdcad --- /dev/null +++ b/Libraries/LibWeb/DOM/DocumentType.idl @@ -0,0 +1,7 @@ +interface DocumentType : Node { + + readonly attribute DOMString name; + readonly attribute DOMString publicId; + readonly attribute DOMString systemId; + +} diff --git a/Libraries/LibWeb/Forward.h b/Libraries/LibWeb/Forward.h index 141870b362..41ad85d60d 100644 --- a/Libraries/LibWeb/Forward.h +++ b/Libraries/LibWeb/Forward.h @@ -30,6 +30,7 @@ namespace Web { class CanvasRenderingContext2D; class Document; +class DocumentType; class Element; class Event; class EventHandler; @@ -72,11 +73,13 @@ class Text; class Timer; class Window; class XMLHttpRequest; +enum class QuirksMode; namespace Bindings { class CanvasRenderingContext2DWrapper; class DocumentWrapper; +class DocumentTypeWrapper; class ElementWrapper; class EventWrapper; class EventListenerWrapper; diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp index 5c276155d6..83ac40917b 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp @@ -46,6 +46,64 @@ namespace Web { +static Vector s_quirks_public_ids = { + "+//Silmaril//dtd html Pro v0r11 19970101//", + "-//AS//DTD HTML 3.0 asWedit + extensions//", + "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", + "-//IETF//DTD HTML 2.0 Level 1//", + "-//IETF//DTD HTML 2.0 Level 2//", + "-//IETF//DTD HTML 2.0 Strict Level 1//", + "-//IETF//DTD HTML 2.0 Strict Level 2//", + "-//IETF//DTD HTML 2.0 Strict//", + "-//IETF//DTD HTML 2.0//", + "-//IETF//DTD HTML 2.1E//", + "-//IETF//DTD HTML 3.0//", + "-//IETF//DTD HTML 3.2 Final//", + "-//IETF//DTD HTML 3.2//", + "-//IETF//DTD HTML 3//", + "-//IETF//DTD HTML Level 0//", + "-//IETF//DTD HTML Level 1//", + "-//IETF//DTD HTML Level 2//", + "-//IETF//DTD HTML Level 3//", + "-//IETF//DTD HTML Strict Level 0//", + "-//IETF//DTD HTML Strict Level 1//", + "-//IETF//DTD HTML Strict Level 2//", + "-//IETF//DTD HTML Strict Level 3//", + "-//IETF//DTD HTML Strict//", + "-//IETF//DTD HTML//", + "-//Metrius//DTD Metrius Presentational//", + "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", + "-//Microsoft//DTD Internet Explorer 2.0 HTML//", + "-//Microsoft//DTD Internet Explorer 2.0 Tables//", + "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", + "-//Microsoft//DTD Internet Explorer 3.0 HTML//", + "-//Microsoft//DTD Internet Explorer 3.0 Tables//", + "-//Netscape Comm. Corp.//DTD HTML//", + "-//Netscape Comm. Corp.//DTD Strict HTML//", + "-//O'Reilly and Associates//DTD HTML 2.0//", + "-//O'Reilly and Associates//DTD HTML Extended 1.0//", + "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", + "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", + "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", + "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", + "-//Spyglass//DTD HTML 2.0 Extended//", + "-//Sun Microsystems Corp.//DTD HotJava HTML//", + "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", + "-//W3C//DTD HTML 3 1995-03-24//", + "-//W3C//DTD HTML 3.2 Draft//", + "-//W3C//DTD HTML 3.2 Final//", + "-//W3C//DTD HTML 3.2//", + "-//W3C//DTD HTML 3.2S Draft//", + "-//W3C//DTD HTML 4.0 Frameset//", + "-//W3C//DTD HTML 4.0 Transitional//", + "-//W3C//DTD HTML Experimental 19960712//", + "-//W3C//DTD HTML Experimental 970421//", + "-//W3C//DTD W3 HTML//", + "-//W3O//DTD W3 HTML 3.0//", + "-//WebTechs//DTD Mozilla HTML 2.0//", + "-//WebTechs//DTD Mozilla HTML//" +}; + RefPtr parse_html_document(const StringView& data, const URL& url, const String& encoding) { HTMLDocumentParser parser(data, encoding); @@ -181,6 +239,60 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok } } +QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const +{ + if (doctype_token.m_doctype.force_quirks) + return QuirksMode::Yes; + + // NOTE: The tokenizer puts the name into lower case for us. + if (doctype_token.m_doctype.name.to_string() != "html") + return QuirksMode::Yes; + + auto public_identifier = doctype_token.m_doctype.public_identifier.to_string(); + auto system_identifier = doctype_token.m_doctype.system_identifier.to_string(); + + if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//")) + return QuirksMode::Yes; + + if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN")) + return QuirksMode::Yes; + + if (public_identifier.equals_ignoring_case("HTML")) + return QuirksMode::Yes; + + if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) + return QuirksMode::Yes; + + for (auto& public_id : s_quirks_public_ids) { + if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive)) + return QuirksMode::Yes; + } + + if (doctype_token.m_doctype.missing_system_identifier) { + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Yes; + + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Yes; + } + + if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + + if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + + if (!doctype_token.m_doctype.missing_system_identifier) { + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + + if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive)) + return QuirksMode::Limited; + } + + return QuirksMode::No; +} + void HTMLDocumentParser::handle_initial(HTMLToken& token) { if (token.is_character() && token.is_parser_whitespace()) { @@ -196,14 +308,16 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token) if (token.is_doctype()) { auto doctype = adopt(*new DocumentType(document())); doctype->set_name(token.m_doctype.name.to_string()); + doctype->set_public_id(token.m_doctype.public_identifier.to_string()); + doctype->set_system_id(token.m_doctype.system_identifier.to_string()); document().append_child(move(doctype)); - document().set_quirks_mode(token.m_doctype.force_quirks); + document().set_quirks_mode(which_quirks_mode(token)); m_insertion_mode = InsertionMode::BeforeHTML; return; } PARSE_ERROR(); - document().set_quirks_mode(true); + document().set_quirks_mode(QuirksMode::Yes); m_insertion_mode = InsertionMode::BeforeHTML; process_using_the_rules_for(InsertionMode::BeforeHTML, token); } @@ -2612,7 +2726,7 @@ NonnullRefPtrVector HTMLDocumentParser::parse_html_fragment(Element& conte { HTMLDocumentParser parser(markup, "utf-8"); parser.m_parsing_fragment = true; - parser.document().set_quirks_mode(context_element.document().in_quirks_mode()); + parser.document().set_quirks_mode(context_element.document().mode()); if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h index 4f33776b62..ec710a2f0d 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h @@ -85,6 +85,8 @@ public: private: const char* insertion_mode_name() const; + QuirksMode which_quirks_mode(const HTMLToken&) const; + void handle_initial(HTMLToken&); void handle_before_html(HTMLToken&); void handle_before_head(HTMLToken&); diff --git a/Libraries/LibWeb/Parser/HTMLToken.h b/Libraries/LibWeb/Parser/HTMLToken.h index 33e53979a8..86636d963a 100644 --- a/Libraries/LibWeb/Parser/HTMLToken.h +++ b/Libraries/LibWeb/Parser/HTMLToken.h @@ -174,9 +174,14 @@ private: // Type::DOCTYPE struct { + // NOTE: "Missing" is a distinct state from the empty string. + StringBuilder name; + bool missing_name { true }; StringBuilder public_identifier; + bool missing_public_identifier { true }; StringBuilder system_identifier; + bool missing_system_identifier { true }; bool force_quirks { false }; } m_doctype; diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index a3bf9ffd4f..4ac64210f5 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -455,6 +455,7 @@ _StartOfFunction: { create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } ON(0) @@ -462,6 +463,7 @@ _StartOfFunction: PARSE_ERROR(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append_codepoint(0xFFFD); + m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } ON('>') @@ -483,6 +485,7 @@ _StartOfFunction: { create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append_codepoint(current_input_character.value()); + m_current_token.m_doctype.missing_name = false; SWITCH_TO(DOCTYPEName); } } @@ -566,12 +569,14 @@ _StartOfFunction: { PARSE_ERROR(); m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { PARSE_ERROR(); m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') @@ -606,12 +611,14 @@ _StartOfFunction: { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') @@ -645,11 +652,13 @@ _StartOfFunction: ON('"') { m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { m_current_token.m_doctype.public_identifier.clear(); + m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') @@ -683,11 +692,13 @@ _StartOfFunction: ON('"') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') @@ -858,12 +869,14 @@ _StartOfFunction: { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { PARSE_ERROR(); m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF @@ -895,11 +908,13 @@ _StartOfFunction: ON('"') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { m_current_token.m_doctype.system_identifier.clear(); + m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF -- cgit v1.2.3