summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Userland/Shell/PosixLexer.cpp227
-rw-r--r--Userland/Shell/PosixLexer.h25
-rw-r--r--Userland/Shell/PosixParser.cpp85
-rw-r--r--Userland/Shell/PosixParser.h14
4 files changed, 332 insertions, 19 deletions
diff --git a/Userland/Shell/PosixLexer.cpp b/Userland/Shell/PosixLexer.cpp
index a3bdd11cda..aa25e05a91 100644
--- a/Userland/Shell/PosixLexer.cpp
+++ b/Userland/Shell/PosixLexer.cpp
@@ -23,8 +23,11 @@ static bool is_part_of_operator(StringView text, char ch)
namespace Shell::Posix {
-Vector<Token> Lexer::batch_next()
+Vector<Token> Lexer::batch_next(Optional<Reduction> starting_reduction)
{
+ if (starting_reduction.has_value())
+ m_next_reduction = *starting_reduction;
+
for (; m_next_reduction != Reduction::None;) {
auto result = reduce(m_next_reduction);
m_next_reduction = result.next_reduction;
@@ -55,6 +58,18 @@ char Lexer::consume()
return ch;
}
+void Lexer::reconsume(StringView string)
+{
+ for (auto byte : string.bytes()) {
+ if (byte == '\n') {
+ m_state.position.end_line.line_number++;
+ m_state.position.end_line.line_column = 0;
+ }
+
+ m_state.position.end_offset++;
+ }
+}
+
bool Lexer::consume_specific(char ch)
{
if (m_lexer.peek() == ch) {
@@ -95,6 +110,8 @@ Lexer::ReductionResult Lexer::reduce(Reduction reduction)
return reduce_command_or_arithmetic_substitution_expansion();
case Reduction::ExtendedParameterExpansion:
return reduce_extended_parameter_expansion();
+ case Reduction::HeredocContents:
+ return reduce_heredoc_contents();
}
VERIFY_NOT_REACHED();
@@ -108,6 +125,91 @@ Lexer::ReductionResult Lexer::reduce_end()
};
}
+Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
+{
+ StringBuilder builder;
+ enum ParseState {
+ Free,
+ InDoubleQuotes,
+ InSingleQuotes,
+ };
+ Vector<ParseState, 4> parse_state;
+ parse_state.append(Free);
+ bool escaped = false;
+ bool had_a_single_quote_segment = false;
+
+ for (auto byte : token.value.bytes()) {
+ switch (parse_state.last()) {
+ case Free:
+ switch (byte) {
+ case '"':
+ if (escaped) {
+ builder.append(byte);
+ escaped = false;
+ } else {
+ parse_state.append(InDoubleQuotes);
+ }
+ break;
+ case '\'':
+ if (escaped) {
+ builder.append(byte);
+ escaped = false;
+ } else {
+ had_a_single_quote_segment = true;
+ parse_state.append(InSingleQuotes);
+ }
+ break;
+ case '\\':
+ if (escaped) {
+ builder.append(byte);
+ escaped = false;
+ } else {
+ escaped = true;
+ }
+ break;
+ default:
+ if (escaped) {
+ builder.append('\\');
+ escaped = false;
+ }
+ builder.append(byte);
+ break;
+ }
+ break;
+ case InDoubleQuotes:
+ if (!escaped && byte == '"') {
+ parse_state.take_last();
+ break;
+ }
+ if (escaped) {
+ if (byte != '"')
+ builder.append('\\');
+ builder.append(byte);
+ break;
+ }
+ if (byte == '\\')
+ escaped = true;
+ else
+ builder.append(byte);
+ break;
+ case InSingleQuotes:
+ if (byte == '\'') {
+ parse_state.take_last();
+ break;
+ }
+ builder.append(byte);
+ break;
+ }
+ }
+
+ // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
+
+ return {
+ .key = builder.to_deprecated_string(),
+ .allow_interpolation = !had_a_single_quote_segment,
+ };
+}
+
Lexer::ReductionResult Lexer::reduce_operator()
{
if (m_lexer.is_eof()) {
@@ -142,8 +244,25 @@ Lexer::ReductionResult Lexer::reduce_operator()
m_state.position.start_line = m_state.position.end_line;
}
+ auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
+
auto result = reduce(Reduction::Start);
tokens.extend(move(result.tokens));
+
+ while (expect_heredoc_entry && tokens.size() == 1) {
+ result = reduce(result.next_reduction);
+ tokens.extend(move(result.tokens));
+ }
+
+ if (expect_heredoc_entry && tokens.size() > 1) {
+ auto [key, interpolation] = process_heredoc_key(tokens[1]);
+ m_state.heredoc_entries.enqueue(HeredocEntry {
+ .key = key,
+ .allow_interpolation = interpolation,
+ .dedent = tokens[0].type == Token::Type::DoubleLessDash,
+ });
+ }
+
return {
.tokens = move(tokens),
.next_reduction = result.next_reduction,
@@ -160,6 +279,7 @@ Lexer::ReductionResult Lexer::reduce_comment()
}
if (consume() == '\n') {
+ m_state.on_new_line = true;
return {
.tokens = { Token::newline() },
.next_reduction = Reduction::Start,
@@ -352,8 +472,64 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
};
}
+Lexer::ReductionResult Lexer::reduce_heredoc_contents()
+{
+ if (m_lexer.is_eof()) {
+ auto tokens = Token::maybe_from_state(m_state);
+ m_state.buffer.clear();
+ m_state.position.start_offset = m_state.position.end_offset;
+ m_state.position.start_line = m_state.position.end_line;
+
+ return {
+ .tokens = move(tokens),
+ .next_reduction = Reduction::End,
+ };
+ }
+
+ if (!m_state.escaping && consume_specific('\\')) {
+ m_state.escaping = true;
+ m_state.buffer.append('\\');
+ return {
+ .tokens = {},
+ .next_reduction = Reduction::HeredocContents,
+ };
+ }
+
+ if (!m_state.escaping && consume_specific('$')) {
+ m_state.buffer.append('$');
+ if (m_lexer.next_is("("))
+ m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
+ else
+ m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
+
+ return {
+ .tokens = {},
+ .next_reduction = Reduction::Expansion,
+ };
+ }
+
+ if (!m_state.escaping && consume_specific('`')) {
+ m_state.buffer.append('`');
+ m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
+ return {
+ .tokens = {},
+ .next_reduction = Reduction::CommandExpansion,
+ };
+ }
+
+ m_state.escaping = false;
+ m_state.buffer.append(consume());
+ return {
+ .tokens = {},
+ .next_reduction = Reduction::HeredocContents,
+ };
+}
+
Lexer::ReductionResult Lexer::reduce_start()
{
+ auto was_on_new_line = m_state.on_new_line;
+ m_state.on_new_line = false;
+
if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state);
m_state.buffer.clear();
@@ -366,6 +542,51 @@ Lexer::ReductionResult Lexer::reduce_start()
};
}
+ if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
+ auto const& entry = m_state.heredoc_entries.head();
+
+ auto start_index = m_lexer.tell();
+ Optional<size_t> end_index;
+
+ for (; !m_lexer.is_eof();) {
+ auto index = m_lexer.tell();
+ auto possible_end_index = m_lexer.tell();
+ if (m_lexer.consume_specific('\n')) {
+ if (entry.dedent)
+ m_lexer.ignore_while(is_any_of("\t"sv));
+ if (m_lexer.consume_specific(entry.key.view())) {
+ if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
+ end_index = possible_end_index;
+ break;
+ }
+ }
+ }
+ if (m_lexer.tell() == index)
+ m_lexer.ignore();
+ }
+
+ auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
+ reconsume(contents);
+
+ m_state.buffer.clear();
+ m_state.buffer.append(contents);
+
+ auto token = Token::maybe_from_state(m_state).first();
+ token.relevant_heredoc_key = entry.key;
+ token.type = Token::Type::HeredocContents;
+
+ m_state.heredoc_entries.dequeue();
+
+ m_state.on_new_line = true;
+
+ m_state.buffer.clear();
+
+ return {
+ .tokens = { move(token) },
+ .next_reduction = Reduction::Start,
+ };
+ }
+
if (m_state.escaping && consume_specific('\n')) {
m_state.escaping = false;
@@ -391,6 +612,8 @@ Lexer::ReductionResult Lexer::reduce_start()
auto tokens = Token::maybe_from_state(m_state);
tokens.append(Token::newline());
+ m_state.on_new_line = true;
+
m_state.buffer.clear();
m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line;
@@ -678,6 +901,8 @@ StringView Token::type_name() const
return "Clobber"sv;
case Type::Semicolon:
return "Semicolon"sv;
+ case Type::HeredocContents:
+ return "HeredocContents"sv;
case Type::AssignmentWord:
return "AssignmentWord"sv;
case Type::Bang:
diff --git a/Userland/Shell/PosixLexer.h b/Userland/Shell/PosixLexer.h
index b42f5a3e62..3c002b6db7 100644
--- a/Userland/Shell/PosixLexer.h
+++ b/Userland/Shell/PosixLexer.h
@@ -8,6 +8,7 @@
#include <AK/DeprecatedString.h>
#include <AK/GenericLexer.h>
+#include <AK/Queue.h>
#include <AK/Variant.h>
#include <AK/Vector.h>
#include <Shell/AST.h>
@@ -29,6 +30,9 @@ enum class Reduction {
ParameterExpansion,
CommandOrArithmeticSubstitutionExpansion,
ExtendedParameterExpansion,
+
+ // Separate rule, not used by the main flow.
+ HeredocContents,
};
struct ExpansionRange {
@@ -177,6 +181,12 @@ struct ResolvedCommandExpansion {
using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion>;
+struct HeredocEntry {
+ DeprecatedString key;
+ bool allow_interpolation;
+ bool dedent;
+};
+
struct State {
StringBuilder buffer {};
Reduction previous_reduction { Reduction::Start };
@@ -194,6 +204,8 @@ struct State {
},
};
Vector<Expansion> expansions {};
+ Queue<HeredocEntry> heredoc_entries {};
+ bool on_new_line { true };
};
struct Token {
@@ -219,6 +231,7 @@ struct Token {
DoubleLessDash,
Clobber,
Semicolon,
+ HeredocContents,
// Not produced by this lexer, but generated in later stages.
AssignmentWord,
@@ -249,6 +262,7 @@ struct Token {
Vector<Expansion> expansions;
Vector<ResolvedExpansion> resolved_expansions {};
StringView original_text;
+ Optional<DeprecatedString> relevant_heredoc_key {};
bool could_be_start_of_a_simple_command { false };
static Vector<Token> maybe_from_state(State const& state)
@@ -378,7 +392,14 @@ public:
{
}
- Vector<Token> batch_next();
+ Vector<Token> batch_next(Optional<Reduction> starting_reduction = {});
+
+ struct HeredocKeyResult {
+ DeprecatedString key;
+ bool allow_interpolation;
+ };
+
+ static HeredocKeyResult process_heredoc_key(Token const&);
private:
struct ReductionResult {
@@ -400,9 +421,11 @@ private:
ReductionResult reduce_parameter_expansion();
ReductionResult reduce_command_or_arithmetic_substitution_expansion();
ReductionResult reduce_extended_parameter_expansion();
+ ReductionResult reduce_heredoc_contents();
char consume();
bool consume_specific(char);
+ void reconsume(StringView);
ExpansionRange range(ssize_t offset = 0) const;
GenericLexer m_lexer;
diff --git a/Userland/Shell/PosixParser.cpp b/Userland/Shell/PosixParser.cpp
index 15b75ae5c8..65570cb5b8 100644
--- a/Userland/Shell/PosixParser.cpp
+++ b/Userland/Shell/PosixParser.cpp
@@ -9,6 +9,11 @@
#include <AK/StringUtils.h>
#include <Shell/PosixParser.h>
+static Shell::AST::Position empty_position()
+{
+ return { 0, 0, { 0, 0 }, { 0, 0 } };
+}
+
template<typename T, typename... Ts>
static inline bool is_one_of(T const& value, Ts const&... values)
{
@@ -22,7 +27,8 @@ static inline bool is_io_operator(Shell::Posix::Token const& token)
Token::Type::Less, Token::Type::Great,
Token::Type::LessAnd, Token::Type::GreatAnd,
Token::Type::DoubleLess, Token::Type::DoubleGreat,
- Token::Type::LessGreat, Token::Type::Clobber);
+ Token::Type::DoubleLessDash, Token::Type::LessGreat,
+ Token::Type::Clobber);
}
static inline bool is_separator(Shell::Posix::Token const& token)
@@ -95,10 +101,10 @@ static inline bool is_valid_name(StringView word)
}
namespace Shell::Posix {
-void Parser::fill_token_buffer()
+void Parser::fill_token_buffer(Optional<Reduction> starting_reduction)
{
for (;;) {
- auto token = next_expanded_token();
+ auto token = next_expanded_token(starting_reduction);
if (!token.has_value())
break;
#if SHELL_POSIX_PARSER_DEBUG
@@ -126,10 +132,36 @@ RefPtr<AST::Node> Parser::parse()
return parse_complete_command();
}
-Optional<Token> Parser::next_expanded_token()
+void Parser::handle_heredoc_contents()
+{
+ while (!eof() && m_token_buffer[m_token_index].type == Token::Type::HeredocContents) {
+ auto& token = m_token_buffer[m_token_index++];
+ auto entry = m_unprocessed_heredoc_entries.get(token.relevant_heredoc_key.value());
+ if (!entry.has_value()) {
+ error(token, "Discarding unexpected heredoc contents for key '{}'", *token.relevant_heredoc_key);
+ continue;
+ }
+
+ auto& heredoc = **entry;
+
+ RefPtr<AST::Node> contents;
+ if (heredoc.allow_interpolation()) {
+ Parser parser { token.value, m_in_interactive_mode, Reduction::HeredocContents };
+ contents = parser.parse_word();
+ } else {
+ contents = make_ref_counted<AST::StringLiteral>(token.position.value_or(empty_position()), token.value, AST::StringLiteral::EnclosureType::None);
+ }
+
+ if (contents)
+ heredoc.set_contents(contents);
+ m_unprocessed_heredoc_entries.remove(*token.relevant_heredoc_key);
+ }
+}
+
+Optional<Token> Parser::next_expanded_token(Optional<Reduction> starting_reduction)
{
while (m_token_buffer.find_if([](auto& token) { return token.type == Token::Type::Eof; }).is_end()) {
- auto tokens = m_lexer.batch_next();
+ auto tokens = m_lexer.batch_next(starting_reduction);
auto expanded = perform_expansions(move(tokens));
m_token_buffer.extend(expanded);
}
@@ -589,11 +621,6 @@ Vector<Token> Parser::perform_expansions(Vector<Token> tokens)
return tokens;
}
-static AST::Position empty_position()
-{
- return { 0, 0, { 0, 0 }, { 0, 0 } };
-}
-
RefPtr<AST::Node> Parser::parse_complete_command()
{
auto list = [&] {
@@ -1835,13 +1862,47 @@ RefPtr<AST::Node> Parser::parse_io_redirect()
if (auto io_file = parse_io_file(start_position, io_number))
return io_file;
- // if (auto io_here = parse_io_here(start_position, io_number))
- // return io_here;
+ if (auto io_here = parse_io_here(start_position, io_number))
+ return io_here;
m_token_index = start_index;
return nullptr;
}
+RefPtr<AST::Node> Parser::parse_io_here(AST::Position start_position, Optional<int> fd)
+{
+ // io_here: IO_NUMBER? (DLESS | DLESSDASH) WORD
+ auto io_operator = peek().type;
+ if (!is_one_of(io_operator, Token::Type::DoubleLess, Token::Type::DoubleLessDash))
+ return nullptr;
+
+ auto io_operator_token = consume();
+
+ auto redirection_fd = fd.value_or(0);
+
+ auto end_keyword = consume();
+ if (!is_one_of(end_keyword.type, Token::Type::Word, Token::Type::Token))
+ return make_ref_counted<AST::SyntaxError>(io_operator_token.position.value_or(start_position), "Expected a heredoc keyword", true);
+
+ auto [end_keyword_text, allow_interpolation] = Lexer::process_heredoc_key(end_keyword);
+ RefPtr<AST::SyntaxError> error;
+
+ auto position = start_position.with_end(peek().position.value_or(empty_position()));
+ auto result = make_ref_counted<AST::Heredoc>(
+ position,
+ end_keyword_text,
+ allow_interpolation,
+ io_operator == Token::Type::DoubleLessDash,
+ Optional<int> { redirection_fd });
+
+ m_unprocessed_heredoc_entries.set(end_keyword_text, result);
+
+ if (error)
+ result->set_is_syntax_error(*error);
+
+ return result;
+}
+
RefPtr<AST::Node> Parser::parse_io_file(AST::Position start_position, Optional<int> fd)
{
auto start_index = m_token_index;
diff --git a/Userland/Shell/PosixParser.h b/Userland/Shell/PosixParser.h
index 4f873128ce..3ffebaa347 100644
--- a/Userland/Shell/PosixParser.h
+++ b/Userland/Shell/PosixParser.h
@@ -13,12 +13,12 @@ namespace Shell::Posix {
class Parser {
public:
- Parser(StringView input, bool interactive = false)
+ Parser(StringView input, bool interactive = false, Optional<Reduction> starting_reduction = {})
: m_lexer(input)
, m_in_interactive_mode(interactive)
, m_eof_token(Token::eof())
{
- fill_token_buffer();
+ fill_token_buffer(starting_reduction);
}
RefPtr<AST::Node> parse();
@@ -31,20 +31,23 @@ public:
auto& errors() const { return m_errors; }
private:
- Optional<Token> next_expanded_token();
+ Optional<Token> next_expanded_token(Optional<Reduction> starting_reduction = {});
Vector<Token> perform_expansions(Vector<Token> tokens);
- void fill_token_buffer();
+ void fill_token_buffer(Optional<Reduction> starting_reduction = {});
+ void handle_heredoc_contents();
- Token const& peek() const
+ Token const& peek()
{
if (eof())
return m_eof_token;
+ handle_heredoc_contents();
return m_token_buffer[m_token_index];
}
Token const& consume()
{
if (eof())
return m_eof_token;
+ handle_heredoc_contents();
return m_token_buffer[m_token_index++];
}
void skip()
@@ -108,6 +111,7 @@ private:
Vector<Token> m_previous_token_buffer;
Vector<Error> m_errors;
+ HashMap<DeprecatedString, NonnullRefPtr<AST::Heredoc>> m_unprocessed_heredoc_entries;
Token m_eof_token;