LibJS+LibCrypto: Allow '_' as a numeric literal separator :^)

This patch adds support for the NumericLiteralSeparator concept from the ECMAScript grammar.
author: Andreas Kling <kling@serenityos.org> 2021-06-26 16:30:05 +0200
committer: Andreas Kling <kling@serenityos.org> 2021-06-26 16:30:35 +0200
commit: 49018553d332280bc1f15ee102ad8235d66ce76d (patch)
tree: 2bf885800fcd37948ad4abdc5e6c16d9d7902cbd /Userland
parent: 527c639c1f60ed562bb274de3714d72ec74e5c9d (diff)
download: serenity-49018553d332280bc1f15ee102ad8235d66ce76d.zip
4 files changed, 61 insertions, 15 deletions
diff --git a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp
index 47d19333ff..61dd2f0321 100644
--- a/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp
+++ b/Userland/Libraries/LibCrypto/BigInt/UnsignedBigInteger.cpp
@@ -72,6 +72,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base10(const String& str)
     UnsignedBigInteger ten { 10 };
 
     for (auto& c : str) {
+        if (c == '_')
+            continue;
         result = result.multiplied_by(ten).plus(parse_ascii_digit(c));
     }
     return result;
@@ -83,6 +85,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base2(const String& str)
     UnsignedBigInteger two { 2 };
 
     for (auto& c : str) {
+        if (c == '_')
+            continue;
         result = result.multiplied_by(two).plus(parse_ascii_digit(c));
     }
     return result;
@@ -94,6 +98,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base8(const String& str)
     UnsignedBigInteger eight { 8 };
 
     for (auto& c : str) {
+        if (c == '_')
+            continue;
         result = result.multiplied_by(eight).plus(parse_ascii_digit(c));
     }
     return result;
@@ -105,6 +111,8 @@ UnsignedBigInteger UnsignedBigInteger::from_base16(const String& str)
     UnsignedBigInteger sixteen { 16 };
 
     for (auto& c : str) {
+        if (c == '_')
+            continue;
         result = result.multiplied_by(sixteen).plus(parse_ascii_hex_digit(c));
     }
     return result;
diff --git a/Userland/Libraries/LibJS/Lexer.cpp b/Userland/Libraries/LibJS/Lexer.cpp
index 910d786483..5d39c5431e 100644
--- a/Userland/Libraries/LibJS/Lexer.cpp
+++ b/Userland/Libraries/LibJS/Lexer.cpp
@@ -193,6 +193,17 @@ void Lexer::consume()
     m_current_char = m_source[m_position++];
 }
 
+bool Lexer::consume_decimal_number()
+{
+    if (!is_ascii_digit(m_current_char))
+        return false;
+
+    while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit)) {
+        consume();
+    }
+    return true;
+}
+
 bool Lexer::consume_exponent()
 {
     consume();
@@ -202,21 +213,22 @@ bool Lexer::consume_exponent()
     if (!is_ascii_digit(m_current_char))
         return false;
 
-    while (is_ascii_digit(m_current_char)) {
-        consume();
-    }
-    return true;
+    return consume_decimal_number();
+}
+
+static constexpr bool is_octal_digit(char ch)
+{
+    return ch >= '0' && ch <= '7';
 }
 
 bool Lexer::consume_octal_number()
 {
     consume();
-    if (!(m_current_char >= '0' && m_current_char <= '7'))
+    if (!is_octal_digit(m_current_char))
         return false;
 
-    while (m_current_char >= '0' && m_current_char <= '7') {
+    while (is_octal_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_octal_digit))
         consume();
-    }
 
     return true;
 }
@@ -227,24 +239,38 @@ bool Lexer::consume_hexadecimal_number()
     if (!is_ascii_hex_digit(m_current_char))
         return false;
 
-    while (is_ascii_hex_digit(m_current_char))
+    while (is_ascii_hex_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_hex_digit))
         consume();
 
     return true;
 }
 
+static constexpr bool is_binary_digit(char ch)
+{
+    return ch == '0' || ch == '1';
+}
+
 bool Lexer::consume_binary_number()
 {
     consume();
-    if (!(m_current_char == '0' || m_current_char == '1'))
+    if (!is_binary_digit(m_current_char))
         return false;
 
-    while (m_current_char == '0' || m_current_char == '1')
+    while (is_binary_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_binary_digit))
         consume();
 
     return true;
 }
 
+template<typename Callback>
+bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const
+{
+    if (m_position >= m_source.length())
+        return false;
+    return m_current_char == '_'
+        && callback(m_source[m_position]);
+}
+
 bool Lexer::match(char a, char b) const
 {
     if (m_position >= m_source.length())
@@ -460,7 +486,7 @@ Token Lexer::next()
             if (m_current_char == '.') {
                 // decimal
                 consume();
-                while (is_ascii_digit(m_current_char))
+                while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit))
                     consume();
                 if (m_current_char == 'e' || m_current_char == 'E')
                     is_invalid_numeric_literal = !consume_exponent();
@@ -494,11 +520,11 @@ Token Lexer::next()
                 // octal without '0o' prefix. Forbidden in 'strict mode'
                 do {
                     consume();
-                } while (is_ascii_digit(m_current_char));
+                } while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit));
             }
         } else {
             // 1...9 or period
-            while (is_ascii_digit(m_current_char))
+            while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit))
                 consume();
             if (m_current_char == 'n') {
                 consume();
@@ -506,7 +532,7 @@ Token Lexer::next()
             } else {
                 if (m_current_char == '.') {
                     consume();
-                    while (is_ascii_digit(m_current_char))
+                    while (is_ascii_digit(m_current_char) || match_numeric_literal_separator_followed_by(is_ascii_digit))
                         consume();
                 }
                 if (m_current_char == 'e' || m_current_char == 'E')
diff --git a/Userland/Libraries/LibJS/Lexer.h b/Userland/Libraries/LibJS/Lexer.h
index 616014499a..a4d1613c42 100644
--- a/Userland/Libraries/LibJS/Lexer.h
+++ b/Userland/Libraries/LibJS/Lexer.h
@@ -29,6 +29,7 @@ private:
     bool consume_octal_number();
     bool consume_hexadecimal_number();
     bool consume_binary_number();
+    bool consume_decimal_number();
     bool is_eof() const;
     bool is_line_terminator() const;
     bool is_identifier_start() const;
@@ -40,6 +41,8 @@ private:
     bool match(char, char) const;
     bool match(char, char, char) const;
     bool match(char, char, char, char) const;
+    template<typename Callback>
+    bool match_numeric_literal_separator_followed_by(Callback) const;
     bool slash_means_division() const;
 
     StringView m_source;
diff --git a/Userland/Libraries/LibJS/Token.cpp b/Userland/Libraries/LibJS/Token.cpp
index 177fac6d61..6f2b0ebde0 100644
--- a/Userland/Libraries/LibJS/Token.cpp
+++ b/Userland/Libraries/LibJS/Token.cpp
@@ -53,7 +53,16 @@ TokenCategory Token::category() const
 double Token::double_value() const
 {
     VERIFY(type() == TokenType::NumericLiteral);
-    String value_string(m_value);
+
+    StringBuilder builder;
+
+    for (auto ch : m_value) {
+        if (ch == '_')
+            continue;
+        builder.append(ch);
+    }
+
+    String value_string = builder.to_string();
     if (value_string[0] == '0' && value_string.length() >= 2) {
         if (value_string[1] == 'x' || value_string[1] == 'X') {
             // hexadecimal
author	Andreas Kling <kling@serenityos.org>	2021-06-26 16:30:05 +0200
committer	Andreas Kling <kling@serenityos.org>	2021-06-26 16:30:35 +0200
commit	49018553d332280bc1f15ee102ad8235d66ce76d (patch)
tree	2bf885800fcd37948ad4abdc5e6c16d9d7902cbd /Userland
parent	527c639c1f60ed562bb274de3714d72ec74e5c9d (diff)
download	serenity-49018553d332280bc1f15ee102ad8235d66ce76d.zip