diff options
-rw-r--r-- | Userland/Shell/PosixLexer.cpp | 227 | ||||
-rw-r--r-- | Userland/Shell/PosixLexer.h | 25 | ||||
-rw-r--r-- | Userland/Shell/PosixParser.cpp | 85 | ||||
-rw-r--r-- | Userland/Shell/PosixParser.h | 14 |
4 files changed, 332 insertions, 19 deletions
diff --git a/Userland/Shell/PosixLexer.cpp b/Userland/Shell/PosixLexer.cpp index a3bdd11cda..aa25e05a91 100644 --- a/Userland/Shell/PosixLexer.cpp +++ b/Userland/Shell/PosixLexer.cpp @@ -23,8 +23,11 @@ static bool is_part_of_operator(StringView text, char ch) namespace Shell::Posix { -Vector<Token> Lexer::batch_next() +Vector<Token> Lexer::batch_next(Optional<Reduction> starting_reduction) { + if (starting_reduction.has_value()) + m_next_reduction = *starting_reduction; + for (; m_next_reduction != Reduction::None;) { auto result = reduce(m_next_reduction); m_next_reduction = result.next_reduction; @@ -55,6 +58,18 @@ char Lexer::consume() return ch; } +void Lexer::reconsume(StringView string) +{ + for (auto byte : string.bytes()) { + if (byte == '\n') { + m_state.position.end_line.line_number++; + m_state.position.end_line.line_column = 0; + } + + m_state.position.end_offset++; + } +} + bool Lexer::consume_specific(char ch) { if (m_lexer.peek() == ch) { @@ -95,6 +110,8 @@ Lexer::ReductionResult Lexer::reduce(Reduction reduction) return reduce_command_or_arithmetic_substitution_expansion(); case Reduction::ExtendedParameterExpansion: return reduce_extended_parameter_expansion(); + case Reduction::HeredocContents: + return reduce_heredoc_contents(); } VERIFY_NOT_REACHED(); @@ -108,6 +125,91 @@ Lexer::ReductionResult Lexer::reduce_end() }; } +Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token) +{ + StringBuilder builder; + enum ParseState { + Free, + InDoubleQuotes, + InSingleQuotes, + }; + Vector<ParseState, 4> parse_state; + parse_state.append(Free); + bool escaped = false; + bool had_a_single_quote_segment = false; + + for (auto byte : token.value.bytes()) { + switch (parse_state.last()) { + case Free: + switch (byte) { + case '"': + if (escaped) { + builder.append(byte); + escaped = false; + } else { + parse_state.append(InDoubleQuotes); + } + break; + case '\'': + if (escaped) { + builder.append(byte); + escaped = false; + } else { + had_a_single_quote_segment = true; + parse_state.append(InSingleQuotes); + } + break; + case '\\': + if (escaped) { + builder.append(byte); + escaped = false; + } else { + escaped = true; + } + break; + default: + if (escaped) { + builder.append('\\'); + escaped = false; + } + builder.append(byte); + break; + } + break; + case InDoubleQuotes: + if (!escaped && byte == '"') { + parse_state.take_last(); + break; + } + if (escaped) { + if (byte != '"') + builder.append('\\'); + builder.append(byte); + break; + } + if (byte == '\\') + escaped = true; + else + builder.append(byte); + break; + case InSingleQuotes: + if (byte == '\'') { + parse_state.take_last(); + break; + } + builder.append(byte); + break; + } + } + + // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/ + + return { + .key = builder.to_deprecated_string(), + .allow_interpolation = !had_a_single_quote_segment, + }; +} + Lexer::ReductionResult Lexer::reduce_operator() { if (m_lexer.is_eof()) { @@ -142,8 +244,25 @@ Lexer::ReductionResult Lexer::reduce_operator() m_state.position.start_line = m_state.position.end_line; } + auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess); + auto result = reduce(Reduction::Start); tokens.extend(move(result.tokens)); + + while (expect_heredoc_entry && tokens.size() == 1) { + result = reduce(result.next_reduction); + tokens.extend(move(result.tokens)); + } + + if (expect_heredoc_entry && tokens.size() > 1) { + auto [key, interpolation] = process_heredoc_key(tokens[1]); + m_state.heredoc_entries.enqueue(HeredocEntry { + .key = key, + .allow_interpolation = interpolation, + .dedent = tokens[0].type == Token::Type::DoubleLessDash, + }); + } + return { .tokens = move(tokens), .next_reduction = result.next_reduction, @@ -160,6 +279,7 @@ Lexer::ReductionResult Lexer::reduce_comment() } if (consume() == '\n') { + m_state.on_new_line = true; return { .tokens = { Token::newline() }, .next_reduction = Reduction::Start, @@ -352,8 +472,64 @@ Lexer::ReductionResult Lexer::reduce_command_expansion() }; } +Lexer::ReductionResult Lexer::reduce_heredoc_contents() +{ + if (m_lexer.is_eof()) { + auto tokens = Token::maybe_from_state(m_state); + m_state.buffer.clear(); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + + return { + .tokens = move(tokens), + .next_reduction = Reduction::End, + }; + } + + if (!m_state.escaping && consume_specific('\\')) { + m_state.escaping = true; + m_state.buffer.append('\\'); + return { + .tokens = {}, + .next_reduction = Reduction::HeredocContents, + }; + } + + if (!m_state.escaping && consume_specific('$')) { + m_state.buffer.append('$'); + if (m_lexer.next_is("(")) + m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); + else + m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() }); + + return { + .tokens = {}, + .next_reduction = Reduction::Expansion, + }; + } + + if (!m_state.escaping && consume_specific('`')) { + m_state.buffer.append('`'); + m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); + return { + .tokens = {}, + .next_reduction = Reduction::CommandExpansion, + }; + } + + m_state.escaping = false; + m_state.buffer.append(consume()); + return { + .tokens = {}, + .next_reduction = Reduction::HeredocContents, + }; +} + Lexer::ReductionResult Lexer::reduce_start() { + auto was_on_new_line = m_state.on_new_line; + m_state.on_new_line = false; + if (m_lexer.is_eof()) { auto tokens = Token::maybe_from_state(m_state); m_state.buffer.clear(); @@ -366,6 +542,51 @@ Lexer::ReductionResult Lexer::reduce_start() }; } + if (was_on_new_line && !m_state.heredoc_entries.is_empty()) { + auto const& entry = m_state.heredoc_entries.head(); + + auto start_index = m_lexer.tell(); + Optional<size_t> end_index; + + for (; !m_lexer.is_eof();) { + auto index = m_lexer.tell(); + auto possible_end_index = m_lexer.tell(); + if (m_lexer.consume_specific('\n')) { + if (entry.dedent) + m_lexer.ignore_while(is_any_of("\t"sv)); + if (m_lexer.consume_specific(entry.key.view())) { + if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) { + end_index = possible_end_index; + break; + } + } + } + if (m_lexer.tell() == index) + m_lexer.ignore(); + } + + auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index); + reconsume(contents); + + m_state.buffer.clear(); + m_state.buffer.append(contents); + + auto token = Token::maybe_from_state(m_state).first(); + token.relevant_heredoc_key = entry.key; + token.type = Token::Type::HeredocContents; + + m_state.heredoc_entries.dequeue(); + + m_state.on_new_line = true; + + m_state.buffer.clear(); + + return { + .tokens = { move(token) }, + .next_reduction = Reduction::Start, + }; + } + if (m_state.escaping && consume_specific('\n')) { m_state.escaping = false; @@ -391,6 +612,8 @@ Lexer::ReductionResult Lexer::reduce_start() auto tokens = Token::maybe_from_state(m_state); tokens.append(Token::newline()); + m_state.on_new_line = true; + m_state.buffer.clear(); m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_line = m_state.position.end_line; @@ -678,6 +901,8 @@ StringView Token::type_name() const return "Clobber"sv; case Type::Semicolon: return "Semicolon"sv; + case Type::HeredocContents: + return "HeredocContents"sv; case Type::AssignmentWord: return "AssignmentWord"sv; case Type::Bang: diff --git a/Userland/Shell/PosixLexer.h b/Userland/Shell/PosixLexer.h index b42f5a3e62..3c002b6db7 100644 --- a/Userland/Shell/PosixLexer.h +++ b/Userland/Shell/PosixLexer.h @@ -8,6 +8,7 @@ #include <AK/DeprecatedString.h> #include <AK/GenericLexer.h> +#include <AK/Queue.h> #include <AK/Variant.h> #include <AK/Vector.h> #include <Shell/AST.h> @@ -29,6 +30,9 @@ enum class Reduction { ParameterExpansion, CommandOrArithmeticSubstitutionExpansion, ExtendedParameterExpansion, + + // Separate rule, not used by the main flow. + HeredocContents, }; struct ExpansionRange { @@ -177,6 +181,12 @@ struct ResolvedCommandExpansion { using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion>; +struct HeredocEntry { + DeprecatedString key; + bool allow_interpolation; + bool dedent; +}; + struct State { StringBuilder buffer {}; Reduction previous_reduction { Reduction::Start }; @@ -194,6 +204,8 @@ struct State { }, }; Vector<Expansion> expansions {}; + Queue<HeredocEntry> heredoc_entries {}; + bool on_new_line { true }; }; struct Token { @@ -219,6 +231,7 @@ struct Token { DoubleLessDash, Clobber, Semicolon, + HeredocContents, // Not produced by this lexer, but generated in later stages. AssignmentWord, @@ -249,6 +262,7 @@ struct Token { Vector<Expansion> expansions; Vector<ResolvedExpansion> resolved_expansions {}; StringView original_text; + Optional<DeprecatedString> relevant_heredoc_key {}; bool could_be_start_of_a_simple_command { false }; static Vector<Token> maybe_from_state(State const& state) @@ -378,7 +392,14 @@ public: { } - Vector<Token> batch_next(); + Vector<Token> batch_next(Optional<Reduction> starting_reduction = {}); + + struct HeredocKeyResult { + DeprecatedString key; + bool allow_interpolation; + }; + + static HeredocKeyResult process_heredoc_key(Token const&); private: struct ReductionResult { @@ -400,9 +421,11 @@ private: ReductionResult reduce_parameter_expansion(); ReductionResult reduce_command_or_arithmetic_substitution_expansion(); ReductionResult reduce_extended_parameter_expansion(); + ReductionResult reduce_heredoc_contents(); char consume(); bool consume_specific(char); + void reconsume(StringView); ExpansionRange range(ssize_t offset = 0) const; GenericLexer m_lexer; diff --git a/Userland/Shell/PosixParser.cpp b/Userland/Shell/PosixParser.cpp index 15b75ae5c8..65570cb5b8 100644 --- a/Userland/Shell/PosixParser.cpp +++ b/Userland/Shell/PosixParser.cpp @@ -9,6 +9,11 @@ #include <AK/StringUtils.h> #include <Shell/PosixParser.h> +static Shell::AST::Position empty_position() +{ + return { 0, 0, { 0, 0 }, { 0, 0 } }; +} + template<typename T, typename... Ts> static inline bool is_one_of(T const& value, Ts const&... values) { @@ -22,7 +27,8 @@ static inline bool is_io_operator(Shell::Posix::Token const& token) Token::Type::Less, Token::Type::Great, Token::Type::LessAnd, Token::Type::GreatAnd, Token::Type::DoubleLess, Token::Type::DoubleGreat, - Token::Type::LessGreat, Token::Type::Clobber); + Token::Type::DoubleLessDash, Token::Type::LessGreat, + Token::Type::Clobber); } static inline bool is_separator(Shell::Posix::Token const& token) @@ -95,10 +101,10 @@ static inline bool is_valid_name(StringView word) } namespace Shell::Posix { -void Parser::fill_token_buffer() +void Parser::fill_token_buffer(Optional<Reduction> starting_reduction) { for (;;) { - auto token = next_expanded_token(); + auto token = next_expanded_token(starting_reduction); if (!token.has_value()) break; #if SHELL_POSIX_PARSER_DEBUG @@ -126,10 +132,36 @@ RefPtr<AST::Node> Parser::parse() return parse_complete_command(); } -Optional<Token> Parser::next_expanded_token() +void Parser::handle_heredoc_contents() +{ + while (!eof() && m_token_buffer[m_token_index].type == Token::Type::HeredocContents) { + auto& token = m_token_buffer[m_token_index++]; + auto entry = m_unprocessed_heredoc_entries.get(token.relevant_heredoc_key.value()); + if (!entry.has_value()) { + error(token, "Discarding unexpected heredoc contents for key '{}'", *token.relevant_heredoc_key); + continue; + } + + auto& heredoc = **entry; + + RefPtr<AST::Node> contents; + if (heredoc.allow_interpolation()) { + Parser parser { token.value, m_in_interactive_mode, Reduction::HeredocContents }; + contents = parser.parse_word(); + } else { + contents = make_ref_counted<AST::StringLiteral>(token.position.value_or(empty_position()), token.value, AST::StringLiteral::EnclosureType::None); + } + + if (contents) + heredoc.set_contents(contents); + m_unprocessed_heredoc_entries.remove(*token.relevant_heredoc_key); + } +} + +Optional<Token> Parser::next_expanded_token(Optional<Reduction> starting_reduction) { while (m_token_buffer.find_if([](auto& token) { return token.type == Token::Type::Eof; }).is_end()) { - auto tokens = m_lexer.batch_next(); + auto tokens = m_lexer.batch_next(starting_reduction); auto expanded = perform_expansions(move(tokens)); m_token_buffer.extend(expanded); } @@ -589,11 +621,6 @@ Vector<Token> Parser::perform_expansions(Vector<Token> tokens) return tokens; } -static AST::Position empty_position() -{ - return { 0, 0, { 0, 0 }, { 0, 0 } }; -} - RefPtr<AST::Node> Parser::parse_complete_command() { auto list = [&] { @@ -1835,13 +1862,47 @@ RefPtr<AST::Node> Parser::parse_io_redirect() if (auto io_file = parse_io_file(start_position, io_number)) return io_file; - // if (auto io_here = parse_io_here(start_position, io_number)) - // return io_here; + if (auto io_here = parse_io_here(start_position, io_number)) + return io_here; m_token_index = start_index; return nullptr; } +RefPtr<AST::Node> Parser::parse_io_here(AST::Position start_position, Optional<int> fd) +{ + // io_here: IO_NUMBER? (DLESS | DLESSDASH) WORD + auto io_operator = peek().type; + if (!is_one_of(io_operator, Token::Type::DoubleLess, Token::Type::DoubleLessDash)) + return nullptr; + + auto io_operator_token = consume(); + + auto redirection_fd = fd.value_or(0); + + auto end_keyword = consume(); + if (!is_one_of(end_keyword.type, Token::Type::Word, Token::Type::Token)) + return make_ref_counted<AST::SyntaxError>(io_operator_token.position.value_or(start_position), "Expected a heredoc keyword", true); + + auto [end_keyword_text, allow_interpolation] = Lexer::process_heredoc_key(end_keyword); + RefPtr<AST::SyntaxError> error; + + auto position = start_position.with_end(peek().position.value_or(empty_position())); + auto result = make_ref_counted<AST::Heredoc>( + position, + end_keyword_text, + allow_interpolation, + io_operator == Token::Type::DoubleLessDash, + Optional<int> { redirection_fd }); + + m_unprocessed_heredoc_entries.set(end_keyword_text, result); + + if (error) + result->set_is_syntax_error(*error); + + return result; +} + RefPtr<AST::Node> Parser::parse_io_file(AST::Position start_position, Optional<int> fd) { auto start_index = m_token_index; diff --git a/Userland/Shell/PosixParser.h b/Userland/Shell/PosixParser.h index 4f873128ce..3ffebaa347 100644 --- a/Userland/Shell/PosixParser.h +++ b/Userland/Shell/PosixParser.h @@ -13,12 +13,12 @@ namespace Shell::Posix { class Parser { public: - Parser(StringView input, bool interactive = false) + Parser(StringView input, bool interactive = false, Optional<Reduction> starting_reduction = {}) : m_lexer(input) , m_in_interactive_mode(interactive) , m_eof_token(Token::eof()) { - fill_token_buffer(); + fill_token_buffer(starting_reduction); } RefPtr<AST::Node> parse(); @@ -31,20 +31,23 @@ public: auto& errors() const { return m_errors; } private: - Optional<Token> next_expanded_token(); + Optional<Token> next_expanded_token(Optional<Reduction> starting_reduction = {}); Vector<Token> perform_expansions(Vector<Token> tokens); - void fill_token_buffer(); + void fill_token_buffer(Optional<Reduction> starting_reduction = {}); + void handle_heredoc_contents(); - Token const& peek() const + Token const& peek() { if (eof()) return m_eof_token; + handle_heredoc_contents(); return m_token_buffer[m_token_index]; } Token const& consume() { if (eof()) return m_eof_token; + handle_heredoc_contents(); return m_token_buffer[m_token_index++]; } void skip() @@ -108,6 +111,7 @@ private: Vector<Token> m_previous_token_buffer; Vector<Error> m_errors; + HashMap<DeprecatedString, NonnullRefPtr<AST::Heredoc>> m_unprocessed_heredoc_entries; Token m_eof_token; |