diff options
author | Stephan Unverwerth <s.unverwerth@gmx.de> | 2020-04-05 14:20:58 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-04-05 16:01:22 +0200 |
commit | 500f6d9e3a8f664264a34daa8425b36b327f8377 (patch) | |
tree | 83f9594caad9951b4b03a155f3e0dfa23107b415 | |
parent | b82a2239c6f32ff26d92ea4568b3751113d6054f (diff) | |
download | serenity-500f6d9e3a8f664264a34daa8425b36b327f8377.zip |
LibJS: Add numeric literal parsing for different bases and exponents
-rw-r--r-- | Libraries/LibJS/Lexer.cpp | 71 | ||||
-rw-r--r-- | Libraries/LibJS/Lexer.h | 2 | ||||
-rw-r--r-- | Libraries/LibJS/Tests/numeric-literals-basic.js | 20 | ||||
-rw-r--r-- | Libraries/LibJS/Token.cpp | 19 |
4 files changed, 107 insertions, 5 deletions
diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp index ddf2297306..1cf08bd29d 100644 --- a/Libraries/LibJS/Lexer.cpp +++ b/Libraries/LibJS/Lexer.cpp @@ -156,6 +156,16 @@ void Lexer::consume() m_current_char = m_source[m_position++]; } +void Lexer::consume_exponent() +{ + consume(); + if (m_current_char == '-' || m_current_char == '+') + consume(); + while (isdigit(m_current_char)) { + consume(); + } +} + bool Lexer::is_eof() const { return m_current_char == EOF; @@ -186,6 +196,11 @@ bool Lexer::is_block_comment_end() const return m_current_char == '*' && m_position < m_source.length() && m_source[m_position] == '/'; } +bool Lexer::is_numeric_literal_start() const +{ + return isdigit(m_current_char) || (m_current_char == '.' && m_position < m_source.length() && isdigit(m_source[m_position])); +} + void Lexer::syntax_error(const char* msg) { m_has_errors = true; @@ -235,10 +250,59 @@ Token Lexer::next() } else { token_type = it->value; } - } else if (isdigit(m_current_char)) { - consume(); - while (m_current_char == '.' || isdigit(m_current_char)) { + } else if (is_numeric_literal_start()) { + if (m_current_char == '0') { consume(); + if (m_current_char == '.') { + // decimal + consume(); + while (isdigit(m_current_char)) { + consume(); + } + if (m_current_char == 'e' || m_current_char == 'E') { + consume_exponent(); + } + } else if (m_current_char == 'e' || m_current_char == 'E') { + consume_exponent(); + } else if (m_current_char == 'o' || m_current_char == 'O') { + // octal + consume(); + while (m_current_char >= '0' && m_current_char <= '7') { + consume(); + } + } else if (m_current_char == 'b' || m_current_char == 'B') { + // binary + consume(); + while (m_current_char == '0' || m_current_char == '1') { + consume(); + } + } else if (m_current_char == 'x' || m_current_char == 'X') { + // hexadecimal + consume(); + while (isxdigit(m_current_char)) { + consume(); + } + } else if (isdigit(m_current_char)) { + // octal without 'O' prefix. Forbidden in 'strict mode' + // FIXME: We need to make sure this produces a syntax error when in strict mode + do { + consume(); + } while (isdigit(m_current_char)); + } + } else { + // 1...9 or period + while (isdigit(m_current_char)) { + consume(); + } + if (m_current_char == '.') { + consume(); + while (isdigit(m_current_char)) { + consume(); + } + } + if (m_current_char == 'e' || m_current_char == 'E') { + consume_exponent(); + } } token_type = TokenType::NumericLiteral; } else if (m_current_char == '"' || m_current_char == '\'') { @@ -330,5 +394,4 @@ Token Lexer::next() return m_current_token; } - } diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h index 9bc286190e..eb1564b280 100644 --- a/Libraries/LibJS/Lexer.h +++ b/Libraries/LibJS/Lexer.h @@ -42,12 +42,14 @@ public: private: void consume(); + void consume_exponent(); bool is_eof() const; bool is_identifier_start() const; bool is_identifier_middle() const; bool is_line_comment_start() const; bool is_block_comment_start() const; bool is_block_comment_end() const; + bool is_numeric_literal_start() const; void syntax_error(const char*); diff --git a/Libraries/LibJS/Tests/numeric-literals-basic.js b/Libraries/LibJS/Tests/numeric-literals-basic.js new file mode 100644 index 0000000000..816d309aed --- /dev/null +++ b/Libraries/LibJS/Tests/numeric-literals-basic.js @@ -0,0 +1,20 @@ +try { + assert(0xff === 255); + assert(0XFF === 255); + assert(0o10 === 8); + assert(0O10 === 8); + assert(0b10 === 2); + assert(0B10 === 2); + assert(1e3 === 1000); + assert(1e+3 === 1000); + assert(1e-3 === 0.001); + assert(.1 === 0.1); + assert(.1e1 === 1); + assert(0.1e1 === 1); + assert(.1e+1 === 1); + assert(0.1e+1 === 1); + + console.log("PASS"); +} catch (e) { + console.log("FAIL: " + e); +} diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp index 22d898b3ae..573e32c8f1 100644 --- a/Libraries/LibJS/Token.cpp +++ b/Libraries/LibJS/Token.cpp @@ -27,6 +27,7 @@ #include "Token.h" #include <AK/Assertions.h> #include <AK/StringBuilder.h> +#include <ctype.h> namespace JS { @@ -52,7 +53,23 @@ const char* Token::name() const double Token::double_value() const { ASSERT(type() == TokenType::NumericLiteral); - return strtod(String(m_value).characters(), nullptr); + String value_string(m_value); + if (value_string[0] == '0' && value_string.length() >= 2) { + if (value_string[1] == 'x' || value_string[1] == 'X') { + // hexadecimal + return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 16)); + } else if (value_string[1] == 'o' || value_string[1] == 'O') { + // octal + return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 8)); + } else if (value_string[1] == 'b' || value_string[1] == 'B') { + // binary + return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 2)); + } else if (isdigit(value_string[1])) { + // also octal, but syntax error in strict mode + return static_cast<double>(strtoul(value_string.characters() + 1, nullptr, 8)); + } + } + return strtod(value_string.characters(), nullptr); } String Token::string_value() const |