/* * Copyright (c) 2022, Ali Mohammad Pur * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace XML { struct ParseError { size_t offset; String error; }; struct Listener { virtual ~Listener() { } virtual void set_source(String) { } virtual void document_start() { } virtual void document_end() { } virtual void element_start(Name const&, HashMap const&) { } virtual void element_end(Name const&) { } virtual void text(String const&) { } virtual void comment(String const&) { } virtual void error(ParseError const&) { } }; class Parser { public: struct Options { bool preserve_cdata { true }; bool preserve_comments { false }; bool treat_errors_as_fatal { true }; Function(SystemID const&, Optional const&)> resolve_external_resource {}; }; Parser(StringView source, Options options) : m_source(source) , m_lexer(source) , m_options(move(options)) { } explicit Parser(StringView source) : m_source(source) , m_lexer(source) { } ErrorOr parse(); ErrorOr parse_with_listener(Listener&); Vector const& parse_error_causes() const { return m_parse_errors; } private: struct EntityReference { Name name; }; ErrorOr parse_internal(); void append_node(NonnullOwnPtr); void append_text(String); void append_comment(String); void enter_node(Node&); void leave_node(); enum class ReferencePlacement { AttributeValue, Content, }; ErrorOr resolve_reference(EntityReference const&, ReferencePlacement); ErrorOr resolve_parameter_entity_reference(EntityReference const&); enum class Required { No, Yes, }; ErrorOr skip_whitespace(Required = Required::No); ErrorOr parse_prolog(); ErrorOr parse_element(); ErrorOr parse_misc(); ErrorOr parse_xml_decl(); ErrorOr parse_doctype_decl(); ErrorOr parse_version_info(); ErrorOr parse_encoding_decl(); ErrorOr parse_standalone_document_decl(); ErrorOr parse_eq(); ErrorOr parse_comment(); ErrorOr parse_processing_instruction(); ErrorOr parse_processing_instruction_target(); ErrorOr parse_name(); ErrorOr, ParseError> parse_empty_element_tag(); ErrorOr, ParseError> parse_start_tag(); ErrorOr parse_end_tag(); ErrorOr parse_content(); ErrorOr parse_attribute(); ErrorOr parse_attribute_value(); ErrorOr, ParseError> parse_reference(); ErrorOr parse_char_data(); ErrorOr, ParseError> parse_internal_subset(); ErrorOr, ParseError> parse_markup_declaration(); ErrorOr, ParseError> parse_declaration_separator(); ErrorOr, ParseError> parse_external_subset_declaration(); ErrorOr parse_element_declaration(); ErrorOr parse_attribute_list_declaration(); ErrorOr parse_entity_declaration(); ErrorOr parse_notation_declaration(); ErrorOr parse_parameter_entity_reference(); ErrorOr parse_content_spec(); ErrorOr parse_attribute_definition(); ErrorOr parse_nm_token(); ErrorOr parse_general_entity_declaration(); ErrorOr parse_parameter_entity_declaration(); ErrorOr parse_public_id(); ErrorOr parse_system_id(); ErrorOr parse_external_id(); ErrorOr parse_entity_value(); ErrorOr parse_notation_data_declaration(); ErrorOr parse_public_id_literal(); ErrorOr parse_system_id_literal(); ErrorOr parse_cdata_section(); ErrorOr parse_attribute_value_inner(StringView disallow); ErrorOr, ParseError> parse_external_subset(); ErrorOr parse_text_declaration(); ErrorOr expect(StringView); template requires(IsCallableWithArguments) ErrorOr expect(Pred, StringView description); template requires(IsCallableWithArguments) ErrorOr expect_many(Pred, StringView description); static size_t s_debug_indent_level; [[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current()) { return ArmedScopeGuard { [this, position = m_lexer.tell(), location] { m_lexer.retreat(m_lexer.tell() - position); (void)location; dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All)); } }; } [[nodiscard]] auto accept_rule() { return TemporaryChange { m_current_rule.accept, true }; } [[nodiscard]] auto enter_rule(SourceLocation location = SourceLocation::current()) { dbgln_if(XML_PARSER_DEBUG, "{:->{}}Enter {}", " ", s_debug_indent_level * 2, location); ++s_debug_indent_level; auto rule = m_current_rule; m_current_rule = { location.function_name(), false }; return ScopeGuard { [location, rule, this] { m_current_rule = rule; --s_debug_indent_level; (void)location; dbgln_if(XML_PARSER_DEBUG, "{:->{}}Leave {}", " ", s_debug_indent_level * 2, location); } }; } template ParseError parse_error(Ts&&... args) { auto error = ParseError { forward(args)... }; if (m_current_rule.accept) { auto rule_name = m_current_rule.rule.value_or(""); if (rule_name.starts_with("parse_"sv)) rule_name = rule_name.substring_view(6); m_parse_errors.append({ error.offset, String::formatted("{}: {}", rule_name, error.error), }); } return error; } StringView m_source; GenericLexer m_lexer; Options m_options; Listener* m_listener { nullptr }; OwnPtr m_root_node; Node* m_entered_node { nullptr }; Version m_version { Version::Version11 }; bool m_in_compatibility_mode { false }; String m_encoding; bool m_standalone { false }; HashMap m_processing_instructions; struct AcceptedRule { Optional rule {}; bool accept { false }; } m_current_rule {}; Vector m_parse_errors; Optional m_doctype; }; } template<> struct AK::Formatter : public AK::Formatter { ErrorOr format(FormatBuilder& builder, XML::ParseError const& error) { return Formatter::format(builder, "{} at offset {}"sv, error.error, error.offset); } };