From bb07d678ac1286d9632efb21a182b53d7ca14607 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Tue, 7 Mar 2023 20:33:16 +0000 Subject: LibCMake: Add a lexer for CMakeCache.txt This is a totally different syntax than for regular CMake files, and also is undocumented and subject to change, but it's also nice and simple. :^) --- Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp | 203 +++++++++++++++++++++++ Userland/Libraries/LibCMake/CMakeCache/Lexer.h | 46 +++++ Userland/Libraries/LibCMake/CMakeCache/Token.h | 55 ++++++ Userland/Libraries/LibCMake/CMakeLists.txt | 1 + Userland/Libraries/LibCMake/Position.h | 18 ++ Userland/Libraries/LibCMake/Token.h | 6 +- 6 files changed, 324 insertions(+), 5 deletions(-) create mode 100644 Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp create mode 100644 Userland/Libraries/LibCMake/CMakeCache/Lexer.h create mode 100644 Userland/Libraries/LibCMake/CMakeCache/Token.h create mode 100644 Userland/Libraries/LibCMake/Position.h (limited to 'Userland') diff --git a/Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp b/Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp new file mode 100644 index 0000000000..b70e57502f --- /dev/null +++ b/Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2023, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "Lexer.h" +#include +#include +#include + +namespace CMake::Cache { + +static bool is_identifier_start_character(u32 c) +{ + return AK::is_ascii_alpha(c) || c == '_' || c == '-'; +} + +static bool is_identifier_character(u32 c) +{ + return AK::is_ascii_alphanumeric(c) || c == '_' || c == '-'; +} + +Lexer::Lexer(StringView input) + : GenericLexer(input) +{ +} + +ErrorOr> Lexer::lex(StringView input) +{ + Lexer lexer { input }; + return lexer.lex_file(); +} + +ErrorOr> Lexer::lex_file() +{ + ScopeLogger logger; + + while (!is_eof()) { + skip_whitespace(); + + if (is_eof()) + break; + + if (next_is('#')) { + consume_comment(); + continue; + } + + if (next_is("//"sv)) { + consume_help_text(); + continue; + } + + if (next_is(is_identifier_start_character)) { + consume_variable_definition(); + continue; + } + + consume_garbage(); + } + + return m_tokens; +} + +void Lexer::skip_whitespace() +{ + ScopeLogger log; + + while (!is_eof()) { + if (next_is('\n')) { + next_line(); + continue; + } + auto consumed = consume_while(AK::is_ascii_space); + if (consumed.is_empty()) + break; + } +} + +void Lexer::consume_comment() +{ + ScopeLogger log; + + auto start = position(); + VERIFY(consume_specific('#')); + auto comment = consume_until('\n'); + emit_token(Token::Type::Comment, comment, start, position()); +} + +void Lexer::consume_help_text() +{ + ScopeLogger log; + + auto start = position(); + VERIFY(consume_specific("//"sv)); + auto help_text = consume_until('\n'); + emit_token(Token::Type::HelpText, help_text, start, position()); +} + +void Lexer::consume_variable_definition() +{ + ScopeLogger log; + + consume_key(); + + if (!next_is(':')) { + consume_garbage(); + return; + } + consume_colon(); + + if (!next_is(is_identifier_start_character)) { + consume_garbage(); + return; + } + consume_type(); + + if (!next_is('=')) { + consume_garbage(); + return; + } + consume_equals(); + + consume_value(); +} + +void Lexer::consume_key() +{ + ScopeLogger log; + + auto start = position(); + auto key = consume_while(is_identifier_character); + emit_token(Token::Type::Key, key, start, position()); +} + +void Lexer::consume_colon() +{ + ScopeLogger log; + + auto start = position(); + VERIFY(consume_specific(':')); + emit_token(Token::Type::Colon, ":"sv, start, position()); +} + +void Lexer::consume_type() +{ + ScopeLogger log; + + auto start = position(); + auto type = consume_while(is_identifier_character); + emit_token(Token::Type::Type, type, start, position()); +} + +void Lexer::consume_equals() +{ + ScopeLogger log; + + auto start = position(); + VERIFY(consume_specific('=')); + emit_token(Token::Type::Colon, "="sv, start, position()); +} + +void Lexer::consume_value() +{ + ScopeLogger log; + + auto start = position(); + auto value = consume_until('\n'); + emit_token(Token::Type::Value, value, start, position()); +} + +void Lexer::consume_garbage() +{ + ScopeLogger log; + + auto start = position(); + auto garbage = consume_until('\n'); + emit_token(Token::Type::Garbage, garbage, start, position()); +} + +Position Lexer::position() const +{ + return Position { + .line = m_line, + .column = tell() - m_string_offset_after_previous_newline, + }; +} + +void Lexer::next_line() +{ + VERIFY(consume_specific('\n')); + m_string_offset_after_previous_newline = tell(); + m_line++; +} + +void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end) +{ + dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column); + m_tokens.empend(type, value, start, end); +} + +} diff --git a/Userland/Libraries/LibCMake/CMakeCache/Lexer.h b/Userland/Libraries/LibCMake/CMakeCache/Lexer.h new file mode 100644 index 0000000000..f258972fb8 --- /dev/null +++ b/Userland/Libraries/LibCMake/CMakeCache/Lexer.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace CMake::Cache { + +class Lexer : private GenericLexer { +public: + static ErrorOr> lex(StringView input); + +private: + Lexer(StringView input); + + ErrorOr> lex_file(); + + void skip_whitespace(); + + void consume_comment(); + void consume_help_text(); + void consume_variable_definition(); + void consume_key(); + void consume_colon(); + void consume_type(); + void consume_equals(); + void consume_value(); + void consume_garbage(); + + Position position() const; + void next_line(); + + void emit_token(Token::Type, StringView value, Position start, Position end); + + Vector m_tokens; + size_t m_line { 0 }; + size_t m_string_offset_after_previous_newline { 0 }; +}; + +} diff --git a/Userland/Libraries/LibCMake/CMakeCache/Token.h b/Userland/Libraries/LibCMake/CMakeCache/Token.h new file mode 100644 index 0000000000..35f84d970f --- /dev/null +++ b/Userland/Libraries/LibCMake/CMakeCache/Token.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace CMake::Cache { + +struct Token { + enum class Type { + Comment, + HelpText, + Key, + Colon, + Type, + Equals, + Value, + Garbage, + }; + Type type; + StringView value; + + Position start; + Position end; +}; + +static constexpr StringView to_string(Token::Type type) +{ + switch (type) { + case Token::Type::Comment: + return "Comment"sv; + case Token::Type::HelpText: + return "HelpText"sv; + case Token::Type::Key: + return "Key"sv; + case Token::Type::Colon: + return "Colon"sv; + case Token::Type::Type: + return "Type"sv; + case Token::Type::Equals: + return "Equals"sv; + case Token::Type::Value: + return "Value"sv; + case Token::Type::Garbage: + return "Garbage"sv; + } + + VERIFY_NOT_REACHED(); +} + +} diff --git a/Userland/Libraries/LibCMake/CMakeLists.txt b/Userland/Libraries/LibCMake/CMakeLists.txt index 89ac0181af..36af8b8e7e 100644 --- a/Userland/Libraries/LibCMake/CMakeLists.txt +++ b/Userland/Libraries/LibCMake/CMakeLists.txt @@ -1,4 +1,5 @@ set(SOURCES + CMakeCache/Lexer.cpp Lexer.cpp SyntaxHighlighter.cpp Token.cpp diff --git a/Userland/Libraries/LibCMake/Position.h b/Userland/Libraries/LibCMake/Position.h new file mode 100644 index 0000000000..1870cfffe9 --- /dev/null +++ b/Userland/Libraries/LibCMake/Position.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2023, Sam Atkins + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace CMake { + +struct Position { + size_t line { 0 }; + size_t column { 0 }; +}; + +} diff --git a/Userland/Libraries/LibCMake/Token.h b/Userland/Libraries/LibCMake/Token.h index b83207b874..ca45a31945 100644 --- a/Userland/Libraries/LibCMake/Token.h +++ b/Userland/Libraries/LibCMake/Token.h @@ -8,14 +8,10 @@ #include #include +#include namespace CMake { -struct Position { - size_t line { 0 }; - size_t column { 0 }; -}; - struct VariableReference { StringView value; Position start; -- cgit v1.2.3