diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2023-02-11 17:59:15 +0330 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2023-02-13 23:00:15 +0330 |
commit | 2a276c86d4b84668ab7f8daa2c1e37f7d9e5418f (patch) | |
tree | 6d3b6533dc1c6e890eb8dde1a540397dfa2a0419 /Userland/Shell/PosixParser.h | |
parent | 2dc1682274c7f1fcb3ba1ad4d613cf307581b748 (diff) | |
download | serenity-2a276c86d4b84668ab7f8daa2c1e37f7d9e5418f.zip |
Shell: Start implementing a POSIX-compliant parser
The parser is still very much a work-in-progress, but it can currently
parse most of the basic bits, the only *completely* unimplemented things
in the parser are:
- heredocs (io_here)
- alias expansion
- arithmetic expansion
There are a whole suite of bugs, and syntax highlighting is unreliable
at best.
For now, this is not attached anywhere, a future commit will enable it
for /bin/sh or a `Shell --posix` invocation.
Diffstat (limited to 'Userland/Shell/PosixParser.h')
-rw-r--r-- | Userland/Shell/PosixParser.h | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/Userland/Shell/PosixParser.h b/Userland/Shell/PosixParser.h new file mode 100644 index 0000000000..4f873128ce --- /dev/null +++ b/Userland/Shell/PosixParser.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <Shell/AST.h> +#include <Shell/PosixLexer.h> + +namespace Shell::Posix { + +class Parser { +public: + Parser(StringView input, bool interactive = false) + : m_lexer(input) + , m_in_interactive_mode(interactive) + , m_eof_token(Token::eof()) + { + fill_token_buffer(); + } + + RefPtr<AST::Node> parse(); + RefPtr<AST::Node> parse_word_list(); + + struct Error { + DeprecatedString message; + Optional<AST::Position> position; + }; + auto& errors() const { return m_errors; } + +private: + Optional<Token> next_expanded_token(); + Vector<Token> perform_expansions(Vector<Token> tokens); + void fill_token_buffer(); + + Token const& peek() const + { + if (eof()) + return m_eof_token; + return m_token_buffer[m_token_index]; + } + Token const& consume() + { + if (eof()) + return m_eof_token; + return m_token_buffer[m_token_index++]; + } + void skip() + { + if (eof()) + return; + m_token_index++; + } + bool eof() const + { + return m_token_index == m_token_buffer.size() || m_token_buffer[m_token_index].type == Token::Type::Eof; + } + + struct CaseItemsResult { + Vector<AST::Position> pipe_positions; + NonnullRefPtrVector<AST::Node> nodes; + }; + + RefPtr<AST::Node> parse_complete_command(); + RefPtr<AST::Node> parse_list(); + RefPtr<AST::Node> parse_and_or(); + RefPtr<AST::Node> parse_pipeline(); + RefPtr<AST::Node> parse_pipe_sequence(); + RefPtr<AST::Node> parse_command(); + RefPtr<AST::Node> parse_compound_command(); + RefPtr<AST::Node> parse_subshell(); + RefPtr<AST::Node> parse_compound_list(); + RefPtr<AST::Node> parse_term(); + RefPtr<AST::Node> parse_for_clause(); + RefPtr<AST::Node> parse_case_clause(); + CaseItemsResult parse_case_list(); + RefPtr<AST::Node> parse_if_clause(); + RefPtr<AST::Node> parse_while_clause(); + RefPtr<AST::Node> parse_until_clause(); + RefPtr<AST::Node> parse_function_definition(); + RefPtr<AST::Node> parse_function_body(); + RefPtr<AST::Node> parse_brace_group(); + RefPtr<AST::Node> parse_do_group(); + RefPtr<AST::Node> parse_simple_command(); + RefPtr<AST::Node> parse_prefix(); + RefPtr<AST::Node> parse_suffix(); + RefPtr<AST::Node> parse_io_redirect(); + RefPtr<AST::Node> parse_redirect_list(); + RefPtr<AST::Node> parse_io_file(AST::Position, Optional<int> fd); + RefPtr<AST::Node> parse_io_here(AST::Position, Optional<int> fd); + RefPtr<AST::Node> parse_word(); + + template<typename... Ts> + void error(Token const& token, CheckedFormatString<Ts...> fmt, Ts&&... args) + { + m_errors.append(Error { + DeprecatedString::formatted(fmt.view(), forward<Ts>(args)...), + token.position, + }); + } + + Lexer m_lexer; + bool m_in_interactive_mode { false }; + Vector<Token, 2> m_token_buffer; + size_t m_token_index { 0 }; + Vector<Token> m_previous_token_buffer; + + Vector<Error> m_errors; + + Token m_eof_token; + + bool m_disallow_command_prefix { true }; +}; + +} |