summaryrefslogtreecommitdiff
path: root/Userland/Shell/PosixParser.h
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2023-02-11 17:59:15 +0330
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2023-02-13 23:00:15 +0330
commit2a276c86d4b84668ab7f8daa2c1e37f7d9e5418f (patch)
tree6d3b6533dc1c6e890eb8dde1a540397dfa2a0419 /Userland/Shell/PosixParser.h
parent2dc1682274c7f1fcb3ba1ad4d613cf307581b748 (diff)
downloadserenity-2a276c86d4b84668ab7f8daa2c1e37f7d9e5418f.zip
Shell: Start implementing a POSIX-compliant parser
The parser is still very much a work-in-progress, but it can currently parse most of the basic bits, the only *completely* unimplemented things in the parser are: - heredocs (io_here) - alias expansion - arithmetic expansion There are a whole suite of bugs, and syntax highlighting is unreliable at best. For now, this is not attached anywhere, a future commit will enable it for /bin/sh or a `Shell --posix` invocation.
Diffstat (limited to 'Userland/Shell/PosixParser.h')
-rw-r--r--Userland/Shell/PosixParser.h117
1 files changed, 117 insertions, 0 deletions
diff --git a/Userland/Shell/PosixParser.h b/Userland/Shell/PosixParser.h
new file mode 100644
index 0000000000..4f873128ce
--- /dev/null
+++ b/Userland/Shell/PosixParser.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <Shell/AST.h>
+#include <Shell/PosixLexer.h>
+
+namespace Shell::Posix {
+
+class Parser {
+public:
+ Parser(StringView input, bool interactive = false)
+ : m_lexer(input)
+ , m_in_interactive_mode(interactive)
+ , m_eof_token(Token::eof())
+ {
+ fill_token_buffer();
+ }
+
+ RefPtr<AST::Node> parse();
+ RefPtr<AST::Node> parse_word_list();
+
+ struct Error {
+ DeprecatedString message;
+ Optional<AST::Position> position;
+ };
+ auto& errors() const { return m_errors; }
+
+private:
+ Optional<Token> next_expanded_token();
+ Vector<Token> perform_expansions(Vector<Token> tokens);
+ void fill_token_buffer();
+
+ Token const& peek() const
+ {
+ if (eof())
+ return m_eof_token;
+ return m_token_buffer[m_token_index];
+ }
+ Token const& consume()
+ {
+ if (eof())
+ return m_eof_token;
+ return m_token_buffer[m_token_index++];
+ }
+ void skip()
+ {
+ if (eof())
+ return;
+ m_token_index++;
+ }
+ bool eof() const
+ {
+ return m_token_index == m_token_buffer.size() || m_token_buffer[m_token_index].type == Token::Type::Eof;
+ }
+
+ struct CaseItemsResult {
+ Vector<AST::Position> pipe_positions;
+ NonnullRefPtrVector<AST::Node> nodes;
+ };
+
+ RefPtr<AST::Node> parse_complete_command();
+ RefPtr<AST::Node> parse_list();
+ RefPtr<AST::Node> parse_and_or();
+ RefPtr<AST::Node> parse_pipeline();
+ RefPtr<AST::Node> parse_pipe_sequence();
+ RefPtr<AST::Node> parse_command();
+ RefPtr<AST::Node> parse_compound_command();
+ RefPtr<AST::Node> parse_subshell();
+ RefPtr<AST::Node> parse_compound_list();
+ RefPtr<AST::Node> parse_term();
+ RefPtr<AST::Node> parse_for_clause();
+ RefPtr<AST::Node> parse_case_clause();
+ CaseItemsResult parse_case_list();
+ RefPtr<AST::Node> parse_if_clause();
+ RefPtr<AST::Node> parse_while_clause();
+ RefPtr<AST::Node> parse_until_clause();
+ RefPtr<AST::Node> parse_function_definition();
+ RefPtr<AST::Node> parse_function_body();
+ RefPtr<AST::Node> parse_brace_group();
+ RefPtr<AST::Node> parse_do_group();
+ RefPtr<AST::Node> parse_simple_command();
+ RefPtr<AST::Node> parse_prefix();
+ RefPtr<AST::Node> parse_suffix();
+ RefPtr<AST::Node> parse_io_redirect();
+ RefPtr<AST::Node> parse_redirect_list();
+ RefPtr<AST::Node> parse_io_file(AST::Position, Optional<int> fd);
+ RefPtr<AST::Node> parse_io_here(AST::Position, Optional<int> fd);
+ RefPtr<AST::Node> parse_word();
+
+ template<typename... Ts>
+ void error(Token const& token, CheckedFormatString<Ts...> fmt, Ts&&... args)
+ {
+ m_errors.append(Error {
+ DeprecatedString::formatted(fmt.view(), forward<Ts>(args)...),
+ token.position,
+ });
+ }
+
+ Lexer m_lexer;
+ bool m_in_interactive_mode { false };
+ Vector<Token, 2> m_token_buffer;
+ size_t m_token_index { 0 };
+ Vector<Token> m_previous_token_buffer;
+
+ Vector<Error> m_errors;
+
+ Token m_eof_token;
+
+ bool m_disallow_command_prefix { true };
+};
+
+}