summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Atkins <atkinssj@serenityos.org>2023-03-07 20:33:16 +0000
committerLinus Groh <mail@linusgroh.de>2023-03-08 14:21:37 +0000
commitbb07d678ac1286d9632efb21a182b53d7ca14607 (patch)
treeca5d4c7bf2e63ec60e6e7d217c80919b88541f13
parent515fca4f7a756c1c40c7a52ca25560b12d2e948b (diff)
downloadserenity-bb07d678ac1286d9632efb21a182b53d7ca14607.zip
LibCMake: Add a lexer for CMakeCache.txt
This is a totally different syntax than for regular CMake files, and also is undocumented and subject to change, but it's also nice and simple. :^)
-rw-r--r--Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp203
-rw-r--r--Userland/Libraries/LibCMake/CMakeCache/Lexer.h46
-rw-r--r--Userland/Libraries/LibCMake/CMakeCache/Token.h55
-rw-r--r--Userland/Libraries/LibCMake/CMakeLists.txt1
-rw-r--r--Userland/Libraries/LibCMake/Position.h18
-rw-r--r--Userland/Libraries/LibCMake/Token.h6
6 files changed, 324 insertions, 5 deletions
diff --git a/Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp b/Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp
new file mode 100644
index 0000000000..b70e57502f
--- /dev/null
+++ b/Userland/Libraries/LibCMake/CMakeCache/Lexer.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "Lexer.h"
+#include <AK/CharacterTypes.h>
+#include <AK/Debug.h>
+#include <AK/ScopeLogger.h>
+
+namespace CMake::Cache {
+
+static bool is_identifier_start_character(u32 c)
+{
+ return AK::is_ascii_alpha(c) || c == '_' || c == '-';
+}
+
+static bool is_identifier_character(u32 c)
+{
+ return AK::is_ascii_alphanumeric(c) || c == '_' || c == '-';
+}
+
+Lexer::Lexer(StringView input)
+ : GenericLexer(input)
+{
+}
+
+ErrorOr<Vector<Token>> Lexer::lex(StringView input)
+{
+ Lexer lexer { input };
+ return lexer.lex_file();
+}
+
+ErrorOr<Vector<Token>> Lexer::lex_file()
+{
+ ScopeLogger<CMAKE_DEBUG> logger;
+
+ while (!is_eof()) {
+ skip_whitespace();
+
+ if (is_eof())
+ break;
+
+ if (next_is('#')) {
+ consume_comment();
+ continue;
+ }
+
+ if (next_is("//"sv)) {
+ consume_help_text();
+ continue;
+ }
+
+ if (next_is(is_identifier_start_character)) {
+ consume_variable_definition();
+ continue;
+ }
+
+ consume_garbage();
+ }
+
+ return m_tokens;
+}
+
+void Lexer::skip_whitespace()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ while (!is_eof()) {
+ if (next_is('\n')) {
+ next_line();
+ continue;
+ }
+ auto consumed = consume_while(AK::is_ascii_space);
+ if (consumed.is_empty())
+ break;
+ }
+}
+
+void Lexer::consume_comment()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ VERIFY(consume_specific('#'));
+ auto comment = consume_until('\n');
+ emit_token(Token::Type::Comment, comment, start, position());
+}
+
+void Lexer::consume_help_text()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ VERIFY(consume_specific("//"sv));
+ auto help_text = consume_until('\n');
+ emit_token(Token::Type::HelpText, help_text, start, position());
+}
+
+void Lexer::consume_variable_definition()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ consume_key();
+
+ if (!next_is(':')) {
+ consume_garbage();
+ return;
+ }
+ consume_colon();
+
+ if (!next_is(is_identifier_start_character)) {
+ consume_garbage();
+ return;
+ }
+ consume_type();
+
+ if (!next_is('=')) {
+ consume_garbage();
+ return;
+ }
+ consume_equals();
+
+ consume_value();
+}
+
+void Lexer::consume_key()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ auto key = consume_while(is_identifier_character);
+ emit_token(Token::Type::Key, key, start, position());
+}
+
+void Lexer::consume_colon()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ VERIFY(consume_specific(':'));
+ emit_token(Token::Type::Colon, ":"sv, start, position());
+}
+
+void Lexer::consume_type()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ auto type = consume_while(is_identifier_character);
+ emit_token(Token::Type::Type, type, start, position());
+}
+
+void Lexer::consume_equals()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ VERIFY(consume_specific('='));
+ emit_token(Token::Type::Colon, "="sv, start, position());
+}
+
+void Lexer::consume_value()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ auto value = consume_until('\n');
+ emit_token(Token::Type::Value, value, start, position());
+}
+
+void Lexer::consume_garbage()
+{
+ ScopeLogger<CMAKE_DEBUG> log;
+
+ auto start = position();
+ auto garbage = consume_until('\n');
+ emit_token(Token::Type::Garbage, garbage, start, position());
+}
+
+Position Lexer::position() const
+{
+ return Position {
+ .line = m_line,
+ .column = tell() - m_string_offset_after_previous_newline,
+ };
+}
+
+void Lexer::next_line()
+{
+ VERIFY(consume_specific('\n'));
+ m_string_offset_after_previous_newline = tell();
+ m_line++;
+}
+
+void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end)
+{
+ dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column);
+ m_tokens.empend(type, value, start, end);
+}
+
+}
diff --git a/Userland/Libraries/LibCMake/CMakeCache/Lexer.h b/Userland/Libraries/LibCMake/CMakeCache/Lexer.h
new file mode 100644
index 0000000000..f258972fb8
--- /dev/null
+++ b/Userland/Libraries/LibCMake/CMakeCache/Lexer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/GenericLexer.h>
+#include <AK/Vector.h>
+#include <LibCMake/CMakeCache/Token.h>
+
+namespace CMake::Cache {
+
+class Lexer : private GenericLexer {
+public:
+ static ErrorOr<Vector<Token>> lex(StringView input);
+
+private:
+ Lexer(StringView input);
+
+ ErrorOr<Vector<Token>> lex_file();
+
+ void skip_whitespace();
+
+ void consume_comment();
+ void consume_help_text();
+ void consume_variable_definition();
+ void consume_key();
+ void consume_colon();
+ void consume_type();
+ void consume_equals();
+ void consume_value();
+ void consume_garbage();
+
+ Position position() const;
+ void next_line();
+
+ void emit_token(Token::Type, StringView value, Position start, Position end);
+
+ Vector<Token> m_tokens;
+ size_t m_line { 0 };
+ size_t m_string_offset_after_previous_newline { 0 };
+};
+
+}
diff --git a/Userland/Libraries/LibCMake/CMakeCache/Token.h b/Userland/Libraries/LibCMake/CMakeCache/Token.h
new file mode 100644
index 0000000000..35f84d970f
--- /dev/null
+++ b/Userland/Libraries/LibCMake/CMakeCache/Token.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibCMake/Position.h>
+
+namespace CMake::Cache {
+
+struct Token {
+ enum class Type {
+ Comment,
+ HelpText,
+ Key,
+ Colon,
+ Type,
+ Equals,
+ Value,
+ Garbage,
+ };
+ Type type;
+ StringView value;
+
+ Position start;
+ Position end;
+};
+
+static constexpr StringView to_string(Token::Type type)
+{
+ switch (type) {
+ case Token::Type::Comment:
+ return "Comment"sv;
+ case Token::Type::HelpText:
+ return "HelpText"sv;
+ case Token::Type::Key:
+ return "Key"sv;
+ case Token::Type::Colon:
+ return "Colon"sv;
+ case Token::Type::Type:
+ return "Type"sv;
+ case Token::Type::Equals:
+ return "Equals"sv;
+ case Token::Type::Value:
+ return "Value"sv;
+ case Token::Type::Garbage:
+ return "Garbage"sv;
+ }
+
+ VERIFY_NOT_REACHED();
+}
+
+}
diff --git a/Userland/Libraries/LibCMake/CMakeLists.txt b/Userland/Libraries/LibCMake/CMakeLists.txt
index 89ac0181af..36af8b8e7e 100644
--- a/Userland/Libraries/LibCMake/CMakeLists.txt
+++ b/Userland/Libraries/LibCMake/CMakeLists.txt
@@ -1,4 +1,5 @@
set(SOURCES
+ CMakeCache/Lexer.cpp
Lexer.cpp
SyntaxHighlighter.cpp
Token.cpp
diff --git a/Userland/Libraries/LibCMake/Position.h b/Userland/Libraries/LibCMake/Position.h
new file mode 100644
index 0000000000..1870cfffe9
--- /dev/null
+++ b/Userland/Libraries/LibCMake/Position.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Types.h>
+
+namespace CMake {
+
+struct Position {
+ size_t line { 0 };
+ size_t column { 0 };
+};
+
+}
diff --git a/Userland/Libraries/LibCMake/Token.h b/Userland/Libraries/LibCMake/Token.h
index b83207b874..ca45a31945 100644
--- a/Userland/Libraries/LibCMake/Token.h
+++ b/Userland/Libraries/LibCMake/Token.h
@@ -8,14 +8,10 @@
#include <AK/StringView.h>
#include <AK/Vector.h>
+#include <LibCMake/Position.h>
namespace CMake {
-struct Position {
- size_t line { 0 };
- size_t column { 0 };
-};
-
struct VariableReference {
StringView value;
Position start;