summaryrefslogtreecommitdiff
path: root/Userland/DevTools/StateMachineGenerator
diff options
context:
space:
mode:
authorDaniel Bertalan <dani@danielbertalan.dev>2021-05-08 09:53:49 +0200
committerAndreas Kling <kling@serenityos.org>2021-05-16 11:50:56 +0200
commit22195d965f4c717b79cafaa08fe286f0e5a006ea (patch)
treea112709f7aadac8bba5ef9993b910342b12f836f /Userland/DevTools/StateMachineGenerator
parentaa4d41fe2c473c3bb78327a1dbe8ec85530259ca (diff)
downloadserenity-22195d965f4c717b79cafaa08fe286f0e5a006ea.zip
DevTools: Add StateMachineGenerator utility
This program turns a description of a state machine that takes its input byte-by-byte into C++ code. The state machine is described in a custom format as specified below: ``` // Comments are started by two slashes, and cause the rest of the line // to be ignored @name ExampleStateMachine // sets the name of the generated class @namespace Test // sets the namespace (optional) @begin Begin // sets the state the parser will start in // The rest of the file contains one or more states and an optional // @anywhere directive. Each of these is a curly bracket delimited set // of state transitions. State transitions contain a selector, the // literal "=>" and a (new_state, action) tuple. Examples: // 0x0a => (Begin, PrintLine) // [0x00..0x1f] => (_, Warn) // '_' means no change // [0x41..0x5a] => (BeginWord, _) // '_' means no action // Rules common to all states. These take precedence over rules in the // specific states. @anywhere { 0x0a => (Begin, PrintLine) [0x00..0x1f] => (_, Warn) } Begin { [0x41..0x5a] => (Word, _) [0x61..0x7a] => (Word, _) // For missing values, the transition (_, _) is implied } Word { // The entry action is run when we transition to this state from a // *different* state. @anywhere can't have this @entry IncreaseWordCount 0x09 => (Begin, _) 0x20 => (Begin, _) // The exit action is run before we transition to any *other* state // from here. @anywhere can't have this @exit EndOfWord } ``` The generated code consists of a single class which takes a `Function<Action, u8>` as a parameter in its constructor. This gets called whenever an action is to be done. This is because some input might not produce an action, but others might produce up to 3 (exit, state transition, entry). The actions allow us to build a more advanced parser over the simple state machine. The sole public method, `void advance(u8)`, handles the input byte-by-byte, managing the state changes and requesting the appropriate Action from the handler. Internally, the state transitions are resolved via a lookup table. This is a bit wasteful for more complex state machines, therefore the generator is designed to be easily extendable with a switch-based resolver; only the private `lookup_state_transition` method needs to be re-implemented. My goal for this tool is to use it for implementing a standard-compliant ANSI escape sequence parser for LibVT, as described on <https://vt100.net/emu/dec_ansi_parser>
Diffstat (limited to 'Userland/DevTools/StateMachineGenerator')
-rw-r--r--Userland/DevTools/StateMachineGenerator/CMakeLists.txt6
-rw-r--r--Userland/DevTools/StateMachineGenerator/main.cpp495
2 files changed, 501 insertions, 0 deletions
diff --git a/Userland/DevTools/StateMachineGenerator/CMakeLists.txt b/Userland/DevTools/StateMachineGenerator/CMakeLists.txt
new file mode 100644
index 0000000000..b5e5da6c8d
--- /dev/null
+++ b/Userland/DevTools/StateMachineGenerator/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(SOURCES
+ main.cpp
+)
+
+add_executable(StateMachineGenerator ${SOURCES})
+target_link_libraries(StateMachineGenerator LagomCore)
diff --git a/Userland/DevTools/StateMachineGenerator/main.cpp b/Userland/DevTools/StateMachineGenerator/main.cpp
new file mode 100644
index 0000000000..46b4e02d7c
--- /dev/null
+++ b/Userland/DevTools/StateMachineGenerator/main.cpp
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2021, Daniel Bertalan <dani@danielbertalan.dev>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/GenericLexer.h>
+#include <AK/HashTable.h>
+#include <AK/NonnullOwnPtr.h>
+#include <AK/SourceGenerator.h>
+#include <AK/String.h>
+#include <AK/StringBuilder.h>
+#include <AK/Types.h>
+#include <LibCore/ArgsParser.h>
+#include <LibCore/File.h>
+#include <ctype.h>
+#include <string.h>
+
+struct Range {
+ int begin;
+ int end;
+};
+
+struct StateTransition {
+ Optional<String> new_state;
+ Optional<String> action;
+};
+
+struct MatchedAction {
+ Range range;
+ StateTransition action;
+};
+
+struct State {
+ String name;
+ Vector<MatchedAction> actions;
+ Optional<String> entry_action;
+ Optional<String> exit_action;
+};
+
+struct StateMachine {
+ String name;
+ String initial_state;
+ Vector<State> states;
+ Optional<State> anywhere;
+ Optional<String> namespaces;
+};
+
+static OwnPtr<StateMachine>
+parse_state_machine(StringView input)
+{
+ auto state_machine = make<StateMachine>();
+ GenericLexer lexer(input);
+
+ auto consume_whitespace = [&] {
+ bool consumed = true;
+ while (consumed) {
+ consumed = lexer.consume_while(isspace).length() > 0;
+ if (lexer.consume_specific("//")) {
+ lexer.consume_line();
+ consumed = true;
+ }
+ }
+ };
+
+ auto consume_identifier = [&] {
+ consume_whitespace();
+ return lexer.consume_while([](char c) { return isalnum(c) || c == '_'; });
+ };
+
+ auto get_hex_value = [&](char c) {
+ if (isdigit(c))
+ return c - '0';
+ else
+ return c - 'a' + 10;
+ };
+
+ auto consume_number = [&] {
+ int num = 0;
+ consume_whitespace();
+ if (lexer.consume_specific("0x")) {
+ auto hex_digits = lexer.consume_while([](char c) {
+ if (isdigit(c)) return true;
+ else {
+ c = tolower(c);
+ return (c >= 'a' && c <= 'f');
+ } });
+ for (auto c : hex_digits)
+ num = 16 * num + get_hex_value(c);
+ } else {
+ lexer.consume_specific('\'');
+ if (lexer.next_is('\\'))
+ num = (int)lexer.consume_escaped_character('\\');
+ else
+ num = lexer.consume_until('\'').to_int().value();
+ lexer.consume_specific('\'');
+ }
+ return num;
+ };
+
+ auto consume_condition = [&] {
+ Range condition;
+ consume_whitespace();
+ if (lexer.consume_specific('[')) {
+ consume_whitespace();
+ condition.begin = consume_number();
+ consume_whitespace();
+ lexer.consume_specific("..");
+ consume_whitespace();
+ condition.end = consume_number();
+ consume_whitespace();
+ lexer.consume_specific(']');
+ } else {
+ auto num = consume_number();
+ condition.begin = num;
+ condition.end = num;
+ }
+ return condition;
+ };
+
+ auto consume_action = [&]() {
+ StateTransition action;
+ consume_whitespace();
+ lexer.consume_specific("=>");
+ consume_whitespace();
+ lexer.consume_specific('(');
+ consume_whitespace();
+ if (!lexer.consume_specific("_"))
+ action.new_state = consume_identifier();
+ consume_whitespace();
+ lexer.consume_specific(',');
+ consume_whitespace();
+ if (!lexer.consume_specific("_"))
+ action.action = consume_identifier();
+ consume_whitespace();
+ lexer.consume_specific(')');
+ return action;
+ };
+
+ auto consume_state_description
+ = [&] {
+ State state;
+ consume_whitespace();
+ state.name = consume_identifier();
+ consume_whitespace();
+ consume_whitespace();
+ lexer.consume_specific('{');
+ for (;;) {
+ consume_whitespace();
+ if (lexer.consume_specific('}')) {
+ break;
+ }
+ if (lexer.consume_specific("@entry")) {
+ consume_whitespace();
+ state.entry_action = consume_identifier();
+ } else if (lexer.consume_specific("@exit")) {
+ consume_whitespace();
+ state.exit_action = consume_identifier();
+ } else if (lexer.next_is('@')) {
+ auto directive = consume_identifier().to_string();
+ fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters());
+ exit(1);
+ } else {
+ MatchedAction matched_action;
+ matched_action.range = consume_condition();
+ matched_action.action = consume_action();
+ state.actions.append(matched_action);
+ }
+ }
+ return state;
+ };
+
+ while (!lexer.is_eof()) {
+ consume_whitespace();
+ if (lexer.is_eof())
+ break;
+ if (lexer.consume_specific("@namespace")) {
+ consume_whitespace();
+ state_machine->namespaces = lexer.consume_while([](char c) { return isalpha(c) || c == ':'; });
+ } else if (lexer.consume_specific("@begin")) {
+ consume_whitespace();
+ state_machine->initial_state = consume_identifier();
+ } else if (lexer.consume_specific("@name")) {
+ consume_whitespace();
+ state_machine->name = consume_identifier();
+ } else if (lexer.next_is("@anywhere")) {
+ lexer.consume_specific('@');
+ state_machine->anywhere = consume_state_description();
+ } else if (lexer.consume_specific('@')) {
+ auto directive = consume_identifier().to_string();
+ fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters());
+ exit(1);
+ } else {
+ auto description = consume_state_description();
+ state_machine->states.append(description);
+ }
+ }
+
+ if (state_machine->initial_state.is_empty()) {
+ fprintf(stderr, "Missing @begin directive\n");
+ exit(1);
+ } else if (state_machine->name.is_empty()) {
+ fprintf(stderr, "Missing @name directive\n");
+ exit(1);
+ }
+
+ if (state_machine->anywhere.has_value()) {
+ state_machine->anywhere.value().name = "_Anywhere";
+ }
+ return state_machine;
+}
+
+void output_header(const StateMachine&, SourceGenerator&);
+void output_cpp(const StateMachine&, SourceGenerator&);
+
+int main(int argc, char** argv)
+{
+ Core::ArgsParser args_parser;
+ const char* path = nullptr;
+ bool header_mode = false;
+ args_parser.add_option(header_mode, "Generate .h file", "header", 'H');
+ args_parser.add_positional_argument(path, "Path to parser description", "input", Core::ArgsParser::Required::Yes);
+ args_parser.parse(argc, argv);
+
+ auto file_or_error = Core::File::open(path, Core::IODevice::ReadOnly);
+ if (file_or_error.is_error()) {
+ fprintf(stderr, "Cannot open %s\n", path);
+ }
+
+ auto content = file_or_error.value()->read_all();
+ auto state_machine = parse_state_machine(content);
+
+ StringBuilder builder;
+ SourceGenerator generator { builder };
+ if (header_mode)
+ output_header(*state_machine, generator);
+ else
+ output_cpp(*state_machine, generator);
+ outln("{}", generator.as_string_view());
+ return 0;
+}
+
+HashTable<String> actions(const StateMachine& machine)
+{
+ HashTable<String> table;
+
+ auto do_state = [&](const State& state) {
+ if (state.entry_action.has_value())
+ table.set(state.entry_action.value());
+ if (state.exit_action.has_value())
+ table.set(state.exit_action.value());
+ for (auto action : state.actions) {
+ if (action.action.action.has_value())
+ table.set(action.action.action.value());
+ }
+ };
+ for (auto state : machine.states) {
+ do_state(state);
+ }
+ if (machine.anywhere.has_value())
+ do_state(machine.anywhere.value());
+ return move(table);
+}
+
+void generate_lookup_table(const StateMachine& machine, SourceGenerator& generator)
+{
+ generator.append(R"~~~(
+ static constexpr StateTransition STATE_TRANSITION_TABLE[][256] = {
+)~~~");
+
+ auto generate_for_state = [&](const State& s) {
+ auto table_generator = generator.fork();
+ table_generator.set("active_state", s.name);
+ table_generator.append("/* @active_state@ */ { ");
+ VERIFY(!s.name.is_empty());
+ Vector<StateTransition> row;
+ for (int i = 0; i < 256; i++)
+ row.append({ s.name, "_Ignore" });
+ for (auto action : s.actions) {
+ for (int range_element = action.range.begin; range_element <= action.range.end; range_element++) {
+ row[range_element] = { action.action.new_state, action.action.action };
+ }
+ }
+ for (int i = 0; i < 256; ++i) {
+ auto cell_generator = table_generator.fork();
+ cell_generator.set("cell_new_state", row[i].new_state.value_or(s.name));
+ cell_generator.set("cell_action", row[i].action.value_or("_Ignore"));
+ cell_generator.append(" {State::@cell_new_state@, Action::@cell_action@}, ");
+ }
+ table_generator.append("},\n");
+ };
+ if (machine.anywhere.has_value()) {
+ generate_for_state(machine.anywhere.value());
+ }
+ for (auto s : machine.states) {
+ generate_for_state(s);
+ }
+ generator.append(R"~~~(
+ };
+)~~~");
+}
+
+void output_header(const StateMachine& machine, SourceGenerator& generator)
+{
+ generator.set("class_name", machine.name);
+ generator.set("initial_state", machine.initial_state);
+ generator.set("state_count", String::number(machine.states.size() + 1));
+
+ generator.append(R"~~~(
+#pragma once
+
+#include <AK/Function.h>
+#include <AK/Platform.h>
+#include <AK/Types.h>
+ )~~~");
+ if (machine.namespaces.has_value()) {
+ generator.set("namespace", machine.namespaces.value());
+ generator.append(R"~~~(
+namespace @namespace@ {
+)~~~");
+ }
+ generator.append(R"~~~(
+class @class_name@ {
+public:
+ enum class Action : u8 {
+ _Ignore,
+)~~~");
+ for (auto a : actions(machine)) {
+ if (a.is_empty())
+ continue;
+ auto action_generator = generator.fork();
+ action_generator.set("action.name", a);
+ action_generator.append(R"~~~(
+ @action.name@,
+ )~~~");
+ }
+
+ generator.append(R"~~~(
+ }; // end Action
+
+ typedef Function<void(Action, u8)> Handler;
+
+ @class_name@(Handler);
+
+ void advance(u8);
+
+private:
+ enum class State : u8 {
+ _Anywhere,
+)~~~");
+
+ int largest_state_value = 0;
+ for (auto s : machine.states) {
+ auto state_generator = generator.fork();
+ state_generator.set("state.name", s.name);
+ largest_state_value++;
+ state_generator.append(R"~~~(
+ @state.name@,
+)~~~");
+ }
+ generator.append(R"~~~(
+ }; // end State
+
+ struct StateTransition {
+ State new_state;
+ Action action;
+ };
+
+ State m_state { State::@initial_state@ };
+
+ Handler m_handler;
+
+ StateTransition lookup_state_transition(u8);
+)~~~");
+
+ auto table_generator = generator.fork();
+ generate_lookup_table(machine, table_generator);
+ generator.append(R"~~~(
+}; // end @class_name@
+)~~~");
+
+ if (machine.namespaces.has_value()) {
+ generator.append(R"~~~(
+} // end namespace
+)~~~");
+ }
+}
+
+void output_cpp(const StateMachine& machine, SourceGenerator& generator)
+{
+ VERIFY(!machine.name.is_empty());
+ generator.set("class_name", machine.name);
+ generator.set("state_count", String::number(machine.states.size() + 1));
+
+ generator.append(R"~~~(
+#include "@class_name@.h"
+#include <AK/Function.h>
+#include <AK/Types.h>
+)~~~");
+ if (machine.namespaces.has_value()) {
+ generator.set("namespace", machine.namespaces.value());
+ generator.append(R"~~~(
+namespace @namespace@ {
+)~~~");
+ }
+ generator.append(R"~~~(
+@class_name@::@class_name@(Handler handler)
+ : m_handler(move(handler))
+{
+}
+
+ALWAYS_INLINE @class_name@::StateTransition @class_name@::lookup_state_transition(u8 byte)
+{
+ VERIFY((u8)m_state < @state_count@);
+ )~~~");
+ if (machine.anywhere.has_value()) {
+ generator.append(R"~~~(
+ auto anywhere_state = STATE_TRANSITION_TABLE[0][byte];
+ if (anywhere_state.new_state != @class_name@::State::_Anywhere || anywhere_state.action != @class_name@::Action::_Ignore)
+ return anywhere_state;
+ else
+)~~~");
+ }
+ generator.append(R"~~~(
+ return STATE_TRANSITION_TABLE[(u8)m_state][byte];
+}
+)~~~");
+
+ generator.append(R"~~~(
+
+void @class_name@::advance(u8 byte)
+{
+ auto next_state = lookup_state_transition(byte);
+ bool state_will_change = next_state.new_state != m_state && next_state.new_state != @class_name@::State::_Anywhere;
+
+ // only run exit directive if state is being changed
+ if (state_will_change)
+ {
+ switch (m_state)
+ {
+)~~~");
+ for (auto s : machine.states) {
+ auto state_generator = generator.fork();
+ if (s.exit_action.has_value()) {
+ state_generator.set("state_name", s.name);
+ state_generator.set("action", s.exit_action.value());
+ state_generator.append(R"~~~(
+ case @class_name@::State::@state_name@:
+ m_handler(Action::@action@, byte);
+ break;
+)~~~");
+ }
+ }
+ generator.append(R"~~~(
+ default:
+ break;
+ }
+ }
+
+ if (next_state.action != @class_name@::Action::_Ignore)
+ m_handler(next_state.action, byte);
+ m_state = next_state.new_state;
+
+ // only run entry directive if state is being changed
+ if (state_will_change)
+ {
+ switch (next_state.new_state)
+ {
+)~~~");
+ for (auto state : machine.states) {
+ auto state_generator = generator.fork();
+ if (state.entry_action.has_value()) {
+ state_generator.set("state_name", state.name);
+ state_generator.set("action", state.entry_action.value());
+ state_generator.append(R"~~~(
+ case @class_name@::State::@state_name@:
+ m_handler(Action::@action@, byte);
+ break;
+)~~~");
+ }
+ }
+ generator.append(R"~~~(
+ default:
+ break;
+ }
+ }
+}
+)~~~");
+
+ if (machine.namespaces.has_value()) {
+ generator.append(R"~~~(
+} // end namespace
+)~~~");
+ }
+}