summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibSQL/Lexer.h
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-04-18 17:35:40 -0400
committerAndreas Kling <kling@serenityos.org>2021-04-20 18:28:34 +0200
commit90517da9ca6d3cb3b44a0c251a926905cb779744 (patch)
treefea5fb94390aee6c3f6a56c56eac817a3ccbf133 /Userland/Libraries/LibSQL/Lexer.h
parent55b7b8e93080aa7e9c52db505da32dfa00b4b4bc (diff)
downloadserenity-90517da9ca6d3cb3b44a0c251a926905cb779744.zip
LibSQL: Introduce a SQL library with a tokenizer
LibSQL aims to be a SQLite clone for SerenityOS. Step 1 is creating a tokenizer to lex SQL tokens. This lexer is heavily influenced by the LibJS lexer.
Diffstat (limited to 'Userland/Libraries/LibSQL/Lexer.h')
-rw-r--r--Userland/Libraries/LibSQL/Lexer.h71
1 files changed, 71 insertions, 0 deletions
diff --git a/Userland/Libraries/LibSQL/Lexer.h b/Userland/Libraries/LibSQL/Lexer.h
new file mode 100644
index 0000000000..502bc0668c
--- /dev/null
+++ b/Userland/Libraries/LibSQL/Lexer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "Token.h"
+#include <AK/HashMap.h>
+#include <AK/String.h>
+#include <AK/StringView.h>
+
+namespace SQL {
+
+class Lexer {
+public:
+ explicit Lexer(StringView source);
+
+ Token next();
+
+private:
+ void consume();
+
+ bool consume_whitespace_and_comments();
+ bool consume_numeric_literal();
+ bool consume_exponent();
+ bool consume_hexadecimal_number();
+
+ bool match(char a, char b) const;
+ bool is_identifier_start() const;
+ bool is_identifier_middle() const;
+ bool is_numeric_literal_start() const;
+ bool is_line_comment_start() const;
+ bool is_block_comment_start() const;
+ bool is_block_comment_end() const;
+ bool is_line_break() const;
+ bool is_eof() const;
+
+ static HashMap<String, TokenType> s_keywords;
+ static HashMap<char, TokenType> s_one_char_tokens;
+ static HashMap<String, TokenType> s_two_char_tokens;
+
+ StringView m_source;
+ size_t m_line_number { 1 };
+ size_t m_line_column { 0 };
+ char m_current_char { 0 };
+ size_t m_position { 0 };
+};
+
+}