diff options
author | Andreas Kling <kling@serenityos.org> | 2021-01-12 12:17:30 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-01-12 12:17:46 +0100 |
commit | 13d7c09125f8eec703d0a43a9a87fc8aa08f7319 (patch) | |
tree | 70fd643c429cea5c1f9362c2674511d17a53f3b5 /Userland/Libraries/LibCpp/Lexer.h | |
parent | dc28c07fa526841e05e16161c74a6c23984f1dd5 (diff) | |
download | serenity-13d7c09125f8eec703d0a43a9a87fc8aa08f7319.zip |
Libraries: Move to Userland/Libraries/
Diffstat (limited to 'Userland/Libraries/LibCpp/Lexer.h')
-rw-r--r-- | Userland/Libraries/LibCpp/Lexer.h | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/Userland/Libraries/LibCpp/Lexer.h b/Userland/Libraries/LibCpp/Lexer.h new file mode 100644 index 0000000000..351dee8aa5 --- /dev/null +++ b/Userland/Libraries/LibCpp/Lexer.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/StringView.h> +#include <AK/Vector.h> + +namespace Cpp { + +#define FOR_EACH_TOKEN_TYPE \ + __TOKEN(Unknown) \ + __TOKEN(Whitespace) \ + __TOKEN(PreprocessorStatement) \ + __TOKEN(IncludeStatement) \ + __TOKEN(IncludePath) \ + __TOKEN(LeftParen) \ + __TOKEN(RightParen) \ + __TOKEN(LeftCurly) \ + __TOKEN(RightCurly) \ + __TOKEN(LeftBracket) \ + __TOKEN(RightBracket) \ + __TOKEN(Less) \ + __TOKEN(Greater) \ + __TOKEN(LessEquals) \ + __TOKEN(GreaterEquals) \ + __TOKEN(LessLess) \ + __TOKEN(GreaterGreater) \ + __TOKEN(LessLessEquals) \ + __TOKEN(GreaterGreaterEquals) \ + __TOKEN(LessGreater) \ + __TOKEN(Comma) \ + __TOKEN(Plus) \ + __TOKEN(PlusPlus) \ + __TOKEN(PlusEquals) \ + __TOKEN(Minus) \ + __TOKEN(MinusMinus) \ + __TOKEN(MinusEquals) \ + __TOKEN(Asterisk) \ + __TOKEN(AsteriskEquals) \ + __TOKEN(Slash) \ + __TOKEN(SlashEquals) \ + __TOKEN(Percent) \ + __TOKEN(PercentEquals) \ + __TOKEN(Caret) \ + __TOKEN(CaretEquals) \ + __TOKEN(ExclamationMark) \ + __TOKEN(ExclamationMarkEquals) \ + __TOKEN(Equals) \ + __TOKEN(EqualsEquals) \ + __TOKEN(And) \ + __TOKEN(AndAnd) \ + __TOKEN(AndEquals) \ + __TOKEN(Pipe) \ + __TOKEN(PipePipe) \ + __TOKEN(PipeEquals) \ + __TOKEN(Tilde) \ + __TOKEN(QuestionMark) \ + __TOKEN(Colon) \ + __TOKEN(ColonColon) \ + __TOKEN(ColonColonAsterisk) \ + __TOKEN(Semicolon) \ + __TOKEN(Dot) \ + __TOKEN(DotAsterisk) \ + __TOKEN(Arrow) \ + __TOKEN(ArrowAsterisk) \ + __TOKEN(DoubleQuotedString) \ + __TOKEN(SingleQuotedString) \ + __TOKEN(RawString) \ + __TOKEN(EscapeSequence) \ + __TOKEN(Comment) \ + __TOKEN(Integer) \ + __TOKEN(Float) \ + __TOKEN(Keyword) \ + __TOKEN(KnownType) \ + __TOKEN(Identifier) + +struct Position { + size_t line; + size_t column; +}; + +struct Token { + enum class Type { +#define __TOKEN(x) x, + FOR_EACH_TOKEN_TYPE +#undef __TOKEN + }; + + const char* to_string() const + { + switch (m_type) { +#define __TOKEN(x) \ + case Type::x: \ + return #x; + FOR_EACH_TOKEN_TYPE +#undef __TOKEN + } + ASSERT_NOT_REACHED(); + } + + Type m_type { Type::Unknown }; + Position m_start; + Position m_end; +}; + +class Lexer { +public: + Lexer(const StringView&); + + Vector<Token> lex(); + +private: + char peek(size_t offset = 0) const; + char consume(); + + StringView m_input; + size_t m_index { 0 }; + Position m_previous_position { 0, 0 }; + Position m_position { 0, 0 }; +}; + +} |