diff options
author | Daniel Bertalan <dani@danielbertalan.dev> | 2021-05-08 09:53:49 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-05-16 11:50:56 +0200 |
commit | 22195d965f4c717b79cafaa08fe286f0e5a006ea (patch) | |
tree | a112709f7aadac8bba5ef9993b910342b12f836f /Userland/DevTools/StateMachineGenerator | |
parent | aa4d41fe2c473c3bb78327a1dbe8ec85530259ca (diff) | |
download | serenity-22195d965f4c717b79cafaa08fe286f0e5a006ea.zip |
DevTools: Add StateMachineGenerator utility
This program turns a description of a state machine that takes its input
byte-by-byte into C++ code. The state machine is described in a custom
format as specified below:
```
// Comments are started by two slashes, and cause the rest of the line
// to be ignored
@name ExampleStateMachine // sets the name of the generated class
@namespace Test // sets the namespace (optional)
@begin Begin // sets the state the parser will start in
// The rest of the file contains one or more states and an optional
// @anywhere directive. Each of these is a curly bracket delimited set
// of state transitions. State transitions contain a selector, the
// literal "=>" and a (new_state, action) tuple. Examples:
// 0x0a => (Begin, PrintLine)
// [0x00..0x1f] => (_, Warn) // '_' means no change
// [0x41..0x5a] => (BeginWord, _) // '_' means no action
// Rules common to all states. These take precedence over rules in the
// specific states.
@anywhere {
0x0a => (Begin, PrintLine)
[0x00..0x1f] => (_, Warn)
}
Begin {
[0x41..0x5a] => (Word, _)
[0x61..0x7a] => (Word, _)
// For missing values, the transition (_, _) is implied
}
Word {
// The entry action is run when we transition to this state from a
// *different* state. @anywhere can't have this
@entry IncreaseWordCount
0x09 => (Begin, _)
0x20 => (Begin, _)
// The exit action is run before we transition to any *other* state
// from here. @anywhere can't have this
@exit EndOfWord
}
```
The generated code consists of a single class which takes a
`Function<Action, u8>` as a parameter in its constructor. This gets
called whenever an action is to be done. This is because some input
might not produce an action, but others might produce up to 3 (exit,
state transition, entry). The actions allow us to build a more
advanced parser over the simple state machine.
The sole public method, `void advance(u8)`, handles the input
byte-by-byte, managing the state changes and requesting the appropriate
Action from the handler.
Internally, the state transitions are resolved via a lookup table. This
is a bit wasteful for more complex state machines, therefore the
generator is designed to be easily extendable with a switch-based
resolver; only the private `lookup_state_transition` method needs to be
re-implemented.
My goal for this tool is to use it for implementing a standard-compliant
ANSI escape sequence parser for LibVT, as described on
<https://vt100.net/emu/dec_ansi_parser>
Diffstat (limited to 'Userland/DevTools/StateMachineGenerator')
-rw-r--r-- | Userland/DevTools/StateMachineGenerator/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Userland/DevTools/StateMachineGenerator/main.cpp | 495 |
2 files changed, 501 insertions, 0 deletions
diff --git a/Userland/DevTools/StateMachineGenerator/CMakeLists.txt b/Userland/DevTools/StateMachineGenerator/CMakeLists.txt new file mode 100644 index 0000000000..b5e5da6c8d --- /dev/null +++ b/Userland/DevTools/StateMachineGenerator/CMakeLists.txt @@ -0,0 +1,6 @@ +set(SOURCES + main.cpp +) + +add_executable(StateMachineGenerator ${SOURCES}) +target_link_libraries(StateMachineGenerator LagomCore) diff --git a/Userland/DevTools/StateMachineGenerator/main.cpp b/Userland/DevTools/StateMachineGenerator/main.cpp new file mode 100644 index 0000000000..46b4e02d7c --- /dev/null +++ b/Userland/DevTools/StateMachineGenerator/main.cpp @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2021, Daniel Bertalan <dani@danielbertalan.dev> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/GenericLexer.h> +#include <AK/HashTable.h> +#include <AK/NonnullOwnPtr.h> +#include <AK/SourceGenerator.h> +#include <AK/String.h> +#include <AK/StringBuilder.h> +#include <AK/Types.h> +#include <LibCore/ArgsParser.h> +#include <LibCore/File.h> +#include <ctype.h> +#include <string.h> + +struct Range { + int begin; + int end; +}; + +struct StateTransition { + Optional<String> new_state; + Optional<String> action; +}; + +struct MatchedAction { + Range range; + StateTransition action; +}; + +struct State { + String name; + Vector<MatchedAction> actions; + Optional<String> entry_action; + Optional<String> exit_action; +}; + +struct StateMachine { + String name; + String initial_state; + Vector<State> states; + Optional<State> anywhere; + Optional<String> namespaces; +}; + +static OwnPtr<StateMachine> +parse_state_machine(StringView input) +{ + auto state_machine = make<StateMachine>(); + GenericLexer lexer(input); + + auto consume_whitespace = [&] { + bool consumed = true; + while (consumed) { + consumed = lexer.consume_while(isspace).length() > 0; + if (lexer.consume_specific("//")) { + lexer.consume_line(); + consumed = true; + } + } + }; + + auto consume_identifier = [&] { + consume_whitespace(); + return lexer.consume_while([](char c) { return isalnum(c) || c == '_'; }); + }; + + auto get_hex_value = [&](char c) { + if (isdigit(c)) + return c - '0'; + else + return c - 'a' + 10; + }; + + auto consume_number = [&] { + int num = 0; + consume_whitespace(); + if (lexer.consume_specific("0x")) { + auto hex_digits = lexer.consume_while([](char c) { + if (isdigit(c)) return true; + else { + c = tolower(c); + return (c >= 'a' && c <= 'f'); + } }); + for (auto c : hex_digits) + num = 16 * num + get_hex_value(c); + } else { + lexer.consume_specific('\''); + if (lexer.next_is('\\')) + num = (int)lexer.consume_escaped_character('\\'); + else + num = lexer.consume_until('\'').to_int().value(); + lexer.consume_specific('\''); + } + return num; + }; + + auto consume_condition = [&] { + Range condition; + consume_whitespace(); + if (lexer.consume_specific('[')) { + consume_whitespace(); + condition.begin = consume_number(); + consume_whitespace(); + lexer.consume_specific(".."); + consume_whitespace(); + condition.end = consume_number(); + consume_whitespace(); + lexer.consume_specific(']'); + } else { + auto num = consume_number(); + condition.begin = num; + condition.end = num; + } + return condition; + }; + + auto consume_action = [&]() { + StateTransition action; + consume_whitespace(); + lexer.consume_specific("=>"); + consume_whitespace(); + lexer.consume_specific('('); + consume_whitespace(); + if (!lexer.consume_specific("_")) + action.new_state = consume_identifier(); + consume_whitespace(); + lexer.consume_specific(','); + consume_whitespace(); + if (!lexer.consume_specific("_")) + action.action = consume_identifier(); + consume_whitespace(); + lexer.consume_specific(')'); + return action; + }; + + auto consume_state_description + = [&] { + State state; + consume_whitespace(); + state.name = consume_identifier(); + consume_whitespace(); + consume_whitespace(); + lexer.consume_specific('{'); + for (;;) { + consume_whitespace(); + if (lexer.consume_specific('}')) { + break; + } + if (lexer.consume_specific("@entry")) { + consume_whitespace(); + state.entry_action = consume_identifier(); + } else if (lexer.consume_specific("@exit")) { + consume_whitespace(); + state.exit_action = consume_identifier(); + } else if (lexer.next_is('@')) { + auto directive = consume_identifier().to_string(); + fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters()); + exit(1); + } else { + MatchedAction matched_action; + matched_action.range = consume_condition(); + matched_action.action = consume_action(); + state.actions.append(matched_action); + } + } + return state; + }; + + while (!lexer.is_eof()) { + consume_whitespace(); + if (lexer.is_eof()) + break; + if (lexer.consume_specific("@namespace")) { + consume_whitespace(); + state_machine->namespaces = lexer.consume_while([](char c) { return isalpha(c) || c == ':'; }); + } else if (lexer.consume_specific("@begin")) { + consume_whitespace(); + state_machine->initial_state = consume_identifier(); + } else if (lexer.consume_specific("@name")) { + consume_whitespace(); + state_machine->name = consume_identifier(); + } else if (lexer.next_is("@anywhere")) { + lexer.consume_specific('@'); + state_machine->anywhere = consume_state_description(); + } else if (lexer.consume_specific('@')) { + auto directive = consume_identifier().to_string(); + fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters()); + exit(1); + } else { + auto description = consume_state_description(); + state_machine->states.append(description); + } + } + + if (state_machine->initial_state.is_empty()) { + fprintf(stderr, "Missing @begin directive\n"); + exit(1); + } else if (state_machine->name.is_empty()) { + fprintf(stderr, "Missing @name directive\n"); + exit(1); + } + + if (state_machine->anywhere.has_value()) { + state_machine->anywhere.value().name = "_Anywhere"; + } + return state_machine; +} + +void output_header(const StateMachine&, SourceGenerator&); +void output_cpp(const StateMachine&, SourceGenerator&); + +int main(int argc, char** argv) +{ + Core::ArgsParser args_parser; + const char* path = nullptr; + bool header_mode = false; + args_parser.add_option(header_mode, "Generate .h file", "header", 'H'); + args_parser.add_positional_argument(path, "Path to parser description", "input", Core::ArgsParser::Required::Yes); + args_parser.parse(argc, argv); + + auto file_or_error = Core::File::open(path, Core::IODevice::ReadOnly); + if (file_or_error.is_error()) { + fprintf(stderr, "Cannot open %s\n", path); + } + + auto content = file_or_error.value()->read_all(); + auto state_machine = parse_state_machine(content); + + StringBuilder builder; + SourceGenerator generator { builder }; + if (header_mode) + output_header(*state_machine, generator); + else + output_cpp(*state_machine, generator); + outln("{}", generator.as_string_view()); + return 0; +} + +HashTable<String> actions(const StateMachine& machine) +{ + HashTable<String> table; + + auto do_state = [&](const State& state) { + if (state.entry_action.has_value()) + table.set(state.entry_action.value()); + if (state.exit_action.has_value()) + table.set(state.exit_action.value()); + for (auto action : state.actions) { + if (action.action.action.has_value()) + table.set(action.action.action.value()); + } + }; + for (auto state : machine.states) { + do_state(state); + } + if (machine.anywhere.has_value()) + do_state(machine.anywhere.value()); + return move(table); +} + +void generate_lookup_table(const StateMachine& machine, SourceGenerator& generator) +{ + generator.append(R"~~~( + static constexpr StateTransition STATE_TRANSITION_TABLE[][256] = { +)~~~"); + + auto generate_for_state = [&](const State& s) { + auto table_generator = generator.fork(); + table_generator.set("active_state", s.name); + table_generator.append("/* @active_state@ */ { "); + VERIFY(!s.name.is_empty()); + Vector<StateTransition> row; + for (int i = 0; i < 256; i++) + row.append({ s.name, "_Ignore" }); + for (auto action : s.actions) { + for (int range_element = action.range.begin; range_element <= action.range.end; range_element++) { + row[range_element] = { action.action.new_state, action.action.action }; + } + } + for (int i = 0; i < 256; ++i) { + auto cell_generator = table_generator.fork(); + cell_generator.set("cell_new_state", row[i].new_state.value_or(s.name)); + cell_generator.set("cell_action", row[i].action.value_or("_Ignore")); + cell_generator.append(" {State::@cell_new_state@, Action::@cell_action@}, "); + } + table_generator.append("},\n"); + }; + if (machine.anywhere.has_value()) { + generate_for_state(machine.anywhere.value()); + } + for (auto s : machine.states) { + generate_for_state(s); + } + generator.append(R"~~~( + }; +)~~~"); +} + +void output_header(const StateMachine& machine, SourceGenerator& generator) +{ + generator.set("class_name", machine.name); + generator.set("initial_state", machine.initial_state); + generator.set("state_count", String::number(machine.states.size() + 1)); + + generator.append(R"~~~( +#pragma once + +#include <AK/Function.h> +#include <AK/Platform.h> +#include <AK/Types.h> + )~~~"); + if (machine.namespaces.has_value()) { + generator.set("namespace", machine.namespaces.value()); + generator.append(R"~~~( +namespace @namespace@ { +)~~~"); + } + generator.append(R"~~~( +class @class_name@ { +public: + enum class Action : u8 { + _Ignore, +)~~~"); + for (auto a : actions(machine)) { + if (a.is_empty()) + continue; + auto action_generator = generator.fork(); + action_generator.set("action.name", a); + action_generator.append(R"~~~( + @action.name@, + )~~~"); + } + + generator.append(R"~~~( + }; // end Action + + typedef Function<void(Action, u8)> Handler; + + @class_name@(Handler); + + void advance(u8); + +private: + enum class State : u8 { + _Anywhere, +)~~~"); + + int largest_state_value = 0; + for (auto s : machine.states) { + auto state_generator = generator.fork(); + state_generator.set("state.name", s.name); + largest_state_value++; + state_generator.append(R"~~~( + @state.name@, +)~~~"); + } + generator.append(R"~~~( + }; // end State + + struct StateTransition { + State new_state; + Action action; + }; + + State m_state { State::@initial_state@ }; + + Handler m_handler; + + StateTransition lookup_state_transition(u8); +)~~~"); + + auto table_generator = generator.fork(); + generate_lookup_table(machine, table_generator); + generator.append(R"~~~( +}; // end @class_name@ +)~~~"); + + if (machine.namespaces.has_value()) { + generator.append(R"~~~( +} // end namespace +)~~~"); + } +} + +void output_cpp(const StateMachine& machine, SourceGenerator& generator) +{ + VERIFY(!machine.name.is_empty()); + generator.set("class_name", machine.name); + generator.set("state_count", String::number(machine.states.size() + 1)); + + generator.append(R"~~~( +#include "@class_name@.h" +#include <AK/Function.h> +#include <AK/Types.h> +)~~~"); + if (machine.namespaces.has_value()) { + generator.set("namespace", machine.namespaces.value()); + generator.append(R"~~~( +namespace @namespace@ { +)~~~"); + } + generator.append(R"~~~( +@class_name@::@class_name@(Handler handler) + : m_handler(move(handler)) +{ +} + +ALWAYS_INLINE @class_name@::StateTransition @class_name@::lookup_state_transition(u8 byte) +{ + VERIFY((u8)m_state < @state_count@); + )~~~"); + if (machine.anywhere.has_value()) { + generator.append(R"~~~( + auto anywhere_state = STATE_TRANSITION_TABLE[0][byte]; + if (anywhere_state.new_state != @class_name@::State::_Anywhere || anywhere_state.action != @class_name@::Action::_Ignore) + return anywhere_state; + else +)~~~"); + } + generator.append(R"~~~( + return STATE_TRANSITION_TABLE[(u8)m_state][byte]; +} +)~~~"); + + generator.append(R"~~~( + +void @class_name@::advance(u8 byte) +{ + auto next_state = lookup_state_transition(byte); + bool state_will_change = next_state.new_state != m_state && next_state.new_state != @class_name@::State::_Anywhere; + + // only run exit directive if state is being changed + if (state_will_change) + { + switch (m_state) + { +)~~~"); + for (auto s : machine.states) { + auto state_generator = generator.fork(); + if (s.exit_action.has_value()) { + state_generator.set("state_name", s.name); + state_generator.set("action", s.exit_action.value()); + state_generator.append(R"~~~( + case @class_name@::State::@state_name@: + m_handler(Action::@action@, byte); + break; +)~~~"); + } + } + generator.append(R"~~~( + default: + break; + } + } + + if (next_state.action != @class_name@::Action::_Ignore) + m_handler(next_state.action, byte); + m_state = next_state.new_state; + + // only run entry directive if state is being changed + if (state_will_change) + { + switch (next_state.new_state) + { +)~~~"); + for (auto state : machine.states) { + auto state_generator = generator.fork(); + if (state.entry_action.has_value()) { + state_generator.set("state_name", state.name); + state_generator.set("action", state.entry_action.value()); + state_generator.append(R"~~~( + case @class_name@::State::@state_name@: + m_handler(Action::@action@, byte); + break; +)~~~"); + } + } + generator.append(R"~~~( + default: + break; + } + } +} +)~~~"); + + if (machine.namespaces.has_value()) { + generator.append(R"~~~( +} // end namespace +)~~~"); + } +} |