diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-04-20 13:29:06 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-04-21 21:37:55 +0200 |
commit | 570aa57df37e05729f96bff4a674d74029a9905b (patch) | |
tree | 0bed680467b42e0ae42f78559e70cee142d5ed59 /Userland/Libraries/LibSQL | |
parent | 730fbfb31ed2ed226d9fdce4bee938efc20e1ef2 (diff) | |
download | serenity-570aa57df37e05729f96bff4a674d74029a9905b.zip |
LibSQL: Lex string and blob literals
Blob literals are the same as string literals except prefixed with an
'x' or 'X'.
Diffstat (limited to 'Userland/Libraries/LibSQL')
-rw-r--r-- | Userland/Libraries/LibSQL/Lexer.cpp | 59 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/Lexer.h | 5 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/Token.h | 4 |
3 files changed, 61 insertions, 7 deletions
diff --git a/Userland/Libraries/LibSQL/Lexer.cpp b/Userland/Libraries/LibSQL/Lexer.cpp index 7802befe54..f0d9d1384b 100644 --- a/Userland/Libraries/LibSQL/Lexer.cpp +++ b/Userland/Libraries/LibSQL/Lexer.cpp @@ -75,6 +75,18 @@ Token Lexer::next() if (is_eof()) { token_type = found_invalid_comment ? TokenType::Invalid : TokenType::Eof; + } else if (is_numeric_literal_start()) { + token_type = TokenType::NumericLiteral; + if (!consume_numeric_literal()) + token_type = TokenType::Invalid; + } else if (is_string_literal_start()) { + token_type = TokenType::StringLiteral; + if (!consume_string_literal()) + token_type = TokenType::Invalid; + } else if (is_blob_literal_start()) { + token_type = TokenType::BlobLiteral; + if (!consume_blob_literal()) + token_type = TokenType::Invalid; } else if (is_identifier_start()) { do { consume(); @@ -85,10 +97,6 @@ Token Lexer::next() } else { token_type = TokenType::Identifier; } - } else if (is_numeric_literal_start()) { - token_type = TokenType::NumericLiteral; - if (!consume_numeric_literal()) - token_type = TokenType::Invalid; } else { bool found_two_char_token = false; if (m_position < m_source.length()) { @@ -190,7 +198,7 @@ bool Lexer::consume_whitespace_and_comments() bool Lexer::consume_numeric_literal() { - // https://www.sqlite.org/draft/syntax/numeric-literal.html + // https://sqlite.org/syntax/numeric-literal.html bool is_valid_numeric_literal = true; if (m_current_char == '0') { @@ -227,6 +235,29 @@ bool Lexer::consume_numeric_literal() return is_valid_numeric_literal; } +bool Lexer::consume_string_literal() +{ + // https://sqlite.org/lang_expr.html - See "3. Literal Values (Constants)" + bool is_valid_string_literal = true; + consume(); + + while (!is_eof() && !is_string_literal_end()) + consume(); + + if (is_eof()) + is_valid_string_literal = false; + consume(); + + return is_valid_string_literal; +} + +bool Lexer::consume_blob_literal() +{ + // https://sqlite.org/lang_expr.html - See "3. Literal Values (Constants)" + consume(); + return consume_string_literal(); +} + bool Lexer::consume_exponent() { consume(); @@ -259,8 +290,7 @@ bool Lexer::match(char a, char b) const if (m_position >= m_source.length()) return false; - return m_current_char == a - && m_source[m_position] == b; + return m_current_char == a && m_source[m_position] == b; } bool Lexer::is_identifier_start() const @@ -278,6 +308,21 @@ bool Lexer::is_numeric_literal_start() const return isdigit(m_current_char) || (m_current_char == '.' && m_position < m_source.length() && isdigit(m_source[m_position])); } +bool Lexer::is_string_literal_start() const +{ + return m_current_char == '\''; +} + +bool Lexer::is_string_literal_end() const +{ + return m_current_char == '\'' && !(m_position < m_source.length() && m_source[m_position] == '\''); +} + +bool Lexer::is_blob_literal_start() const +{ + return match('x', '\'') || match('X', '\''); +} + bool Lexer::is_line_comment_start() const { return match('-', '-'); diff --git a/Userland/Libraries/LibSQL/Lexer.h b/Userland/Libraries/LibSQL/Lexer.h index 502bc0668c..62509a9da1 100644 --- a/Userland/Libraries/LibSQL/Lexer.h +++ b/Userland/Libraries/LibSQL/Lexer.h @@ -44,6 +44,8 @@ private: bool consume_whitespace_and_comments(); bool consume_numeric_literal(); + bool consume_string_literal(); + bool consume_blob_literal(); bool consume_exponent(); bool consume_hexadecimal_number(); @@ -51,6 +53,9 @@ private: bool is_identifier_start() const; bool is_identifier_middle() const; bool is_numeric_literal_start() const; + bool is_string_literal_start() const; + bool is_string_literal_end() const; + bool is_blob_literal_start() const; bool is_line_comment_start() const; bool is_block_comment_start() const; bool is_block_comment_end() const; diff --git a/Userland/Libraries/LibSQL/Token.h b/Userland/Libraries/LibSQL/Token.h index a882b600b5..5acebab0bd 100644 --- a/Userland/Libraries/LibSQL/Token.h +++ b/Userland/Libraries/LibSQL/Token.h @@ -182,6 +182,8 @@ namespace SQL { __ENUMERATE_SQL_TOKEN("WITHOUT", Without, Keyword) \ __ENUMERATE_SQL_TOKEN("_identifier_", Identifier, Identifier) \ __ENUMERATE_SQL_TOKEN("_numeric_", NumericLiteral, Number) \ + __ENUMERATE_SQL_TOKEN("_string_", StringLiteral, String) \ + __ENUMERATE_SQL_TOKEN("_blob_", BlobLiteral, Blob) \ __ENUMERATE_SQL_TOKEN("_eof_", Eof, Invalid) \ __ENUMERATE_SQL_TOKEN("_invalid_", Invalid, Invalid) \ __ENUMERATE_SQL_TOKEN("&", Ampersand, Operator) \ @@ -221,6 +223,8 @@ enum class TokenCategory { Keyword, Identifier, Number, + String, + Blob, Operator, Punctuation, }; |