summaryrefslogtreecommitdiff
path: root/DevTools/HackStudio
diff options
context:
space:
mode:
authorAndreas Kling <awesomekling@gmail.com>2019-10-25 19:52:44 +0200
committerAndreas Kling <awesomekling@gmail.com>2019-10-25 19:52:44 +0200
commit307cbf83c3834512ecf6d208cf06d177abfbfb56 (patch)
tree6ae63107f8feba2bc65923acc065144bca53596b /DevTools/HackStudio
parent51e655f9033d4c14119ab2264735545d3cd85a0a (diff)
downloadserenity-307cbf83c3834512ecf6d208cf06d177abfbfb56.zip
HackStudio: Start building a C++ lexer to help with syntax highlighting
Diffstat (limited to 'DevTools/HackStudio')
-rw-r--r--DevTools/HackStudio/CppLexer.cpp179
-rw-r--r--DevTools/HackStudio/CppLexer.h61
-rw-r--r--DevTools/HackStudio/Makefile1
3 files changed, 241 insertions, 0 deletions
diff --git a/DevTools/HackStudio/CppLexer.cpp b/DevTools/HackStudio/CppLexer.cpp
new file mode 100644
index 0000000000..8d4c688249
--- /dev/null
+++ b/DevTools/HackStudio/CppLexer.cpp
@@ -0,0 +1,179 @@
+#include "CppLexer.h"
+#include <AK/LogStream.h>
+#include <ctype.h>
+
+CppLexer::CppLexer(const StringView& input)
+ : m_input(input)
+{
+}
+
+char CppLexer::peek(int offset) const
+{
+ if ((m_index + offset) >= m_input.length())
+ return 0;
+ return m_input[m_index + offset];
+}
+
+char CppLexer::consume()
+{
+ ASSERT(m_index < m_input.length());
+ return m_input[m_index++];
+}
+
+static bool is_valid_first_character_of_identifier(char ch)
+{
+ return isalpha(ch) || ch == '_' || ch == '$';
+}
+
+static bool is_valid_nonfirst_character_of_identifier(char ch)
+{
+ return is_valid_first_character_of_identifier(ch) || isdigit(ch);
+}
+
+static bool is_keyword(const StringView& string)
+{
+ if (string == "int" || string == "char" || string == "return")
+ return true;
+ return false;
+}
+
+Vector<CppToken> CppLexer::lex()
+{
+ Vector<CppToken> tokens;
+
+ auto emit_token = [&](auto type) {
+ CppToken token;
+ token.m_type = type;
+ token.m_view = StringView(m_input.characters_without_null_termination() + m_index, 1);
+ tokens.append(token);
+ m_index++;
+ };
+
+ int token_start_index = 0;
+ auto begin_token = [&] {
+ token_start_index = m_index;
+ };
+ auto commit_token = [&](auto type) {
+ CppToken token;
+ token.m_type = type;
+ token.m_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
+ tokens.append(token);
+ };
+
+ while (m_index < m_input.length()) {
+ auto ch = peek();
+ if (isspace(ch)) {
+ begin_token();
+ while (isspace(peek()))
+ consume();
+ commit_token(CppToken::Type::Whitespace);
+ continue;
+ }
+ if (ch == '(') {
+ emit_token(CppToken::Type::LeftParen);
+ continue;
+ }
+ if (ch == ')') {
+ emit_token(CppToken::Type::RightParen);
+ continue;
+ }
+ if (ch == '{') {
+ emit_token(CppToken::Type::LeftCurly);
+ continue;
+ }
+ if (ch == '}') {
+ emit_token(CppToken::Type::RightCurly);
+ continue;
+ }
+ if (ch == '[') {
+ emit_token(CppToken::Type::LeftBracket);
+ continue;
+ }
+ if (ch == ']') {
+ emit_token(CppToken::Type::RightBracket);
+ continue;
+ }
+ if (ch == ',') {
+ emit_token(CppToken::Type::Comma);
+ continue;
+ }
+ if (ch == '*') {
+ emit_token(CppToken::Type::Asterisk);
+ continue;
+ }
+ if (ch == ';') {
+ emit_token(CppToken::Type::Semicolon);
+ continue;
+ }
+ if (ch == '#') {
+ begin_token();
+ while (peek() && peek() != '\n')
+ consume();
+ commit_token(CppToken::Type::PreprocessorStatement);
+ continue;
+ }
+ if (ch == '/' && peek(1) == '/') {
+ begin_token();
+ while (peek() && peek() != '\n')
+ consume();
+ commit_token(CppToken::Type::Comment);
+ continue;
+ }
+ if (ch == '/' && peek(1) == '*') {
+ begin_token();
+ consume();
+ consume();
+ while (peek()) {
+ if (peek() == '*' && peek(1) == '/')
+ break;
+ consume();
+ }
+ consume();
+ consume();
+ emit_token(CppToken::Type::Comment);
+ continue;
+ }
+ if (ch == '"') {
+ begin_token();
+ consume();
+ while (peek()) {
+ if (consume() == '"')
+ break;
+ }
+ commit_token(CppToken::Type::DoubleQuotedString);
+ continue;
+ }
+ if (ch == '\'') {
+ begin_token();
+ consume();
+ while (peek()) {
+ if (consume() == '\'')
+ break;
+ }
+ commit_token(CppToken::Type::SingleQuotedString);
+ continue;
+ }
+ if (isdigit(ch)) {
+ begin_token();
+ while (peek() && isdigit(peek())) {
+ consume();
+ }
+ commit_token(CppToken::Type::Number);
+ continue;
+ }
+ if (is_valid_first_character_of_identifier(ch)) {
+ begin_token();
+ while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
+ consume();
+ auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
+ if (is_keyword(token_view))
+ commit_token(CppToken::Type::Keyword);
+ else
+ commit_token(CppToken::Type::Identifier);
+ continue;
+ }
+ dbg() << "Unimplemented token character: " << ch;
+ ASSERT_NOT_REACHED();
+ }
+ return tokens;
+}
diff --git a/DevTools/HackStudio/CppLexer.h b/DevTools/HackStudio/CppLexer.h
new file mode 100644
index 0000000000..80d8515361
--- /dev/null
+++ b/DevTools/HackStudio/CppLexer.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <AK/StringView.h>
+#include <AK/Vector.h>
+
+#define FOR_EACH_TOKEN_TYPE \
+ __TOKEN(Invalid) \
+ __TOKEN(Whitespace) \
+ __TOKEN(PreprocessorStatement) \
+ __TOKEN(LeftParen) \
+ __TOKEN(RightParen) \
+ __TOKEN(LeftCurly) \
+ __TOKEN(RightCurly) \
+ __TOKEN(LeftBracket) \
+ __TOKEN(RightBracket) \
+ __TOKEN(Comma) \
+ __TOKEN(Asterisk) \
+ __TOKEN(Semicolon) \
+ __TOKEN(DoubleQuotedString) \
+ __TOKEN(SingleQuotedString) \
+ __TOKEN(Comment) \
+ __TOKEN(Number) \
+ __TOKEN(Keyword) \
+ __TOKEN(Identifier)
+
+struct CppToken {
+ enum class Type {
+#define __TOKEN(x) x,
+ FOR_EACH_TOKEN_TYPE
+#undef __TOKEN
+ };
+
+ const char* to_string() const
+ {
+ switch (m_type) {
+#define __TOKEN(x) \
+ case Type::x: \
+ return #x;
+ FOR_EACH_TOKEN_TYPE
+#undef __TOKEN
+ }
+ ASSERT_NOT_REACHED();
+ }
+
+ Type m_type { Type::Invalid };
+ StringView m_view;
+};
+
+class CppLexer {
+public:
+ CppLexer(const StringView&);
+
+ Vector<CppToken> lex();
+
+private:
+ char peek(int offset = 0) const;
+ char consume();
+
+ StringView m_input;
+ int m_index { 0 };
+};
diff --git a/DevTools/HackStudio/Makefile b/DevTools/HackStudio/Makefile
index 728552f7c3..dc35d177eb 100644
--- a/DevTools/HackStudio/Makefile
+++ b/DevTools/HackStudio/Makefile
@@ -6,6 +6,7 @@ OBJS = \
TerminalWrapper.o \
FindInFilesWidget.o \
ProcessStateWidget.o \
+ CppLexer.o \
main.o
APP = HackStudio