1 files changed, 382 insertions, 125 deletions
diff --git a/Libraries/LibC/stdlib.cpp b/Libraries/LibC/stdlib.cpp
index a823ffaf91..c5d84191cd 100644
--- a/Libraries/LibC/stdlib.cpp
+++ b/Libraries/LibC/stdlib.cpp
@@ -26,6 +26,7 @@
 
 #include <AK/Assertions.h>
 #include <AK/HashMap.h>
+#include <AK/Noncopyable.h>
 #include <AK/StdLibExtras.h>
 #include <AK/Types.h>
 #include <AK/Utf8View.h>
@@ -129,6 +130,130 @@ static inline T strtol_impl(const char* nptr, char** endptr, int base)
     return num;
 }
 
+static void strtons(const char* str, char** endptr)
+{
+    assert(endptr);
+    char* ptr = const_cast<char*>(str);
+    while (isspace(*ptr)) {
+        ptr += 1;
+    }
+    *endptr = ptr;
+}
+
+enum Sign {
+    Negative,
+    Positive,
+};
+
+static Sign strtosign(const char* str, char** endptr)
+{
+    assert(endptr);
+    if (*str == '+') {
+        *endptr = const_cast<char*>(str + 1);
+        return Sign::Positive;
+    } else if (*str == '-') {
+        *endptr = const_cast<char*>(str + 1);
+        return Sign::Negative;
+    } else {
+        *endptr = const_cast<char*>(str);
+        return Sign::Positive;
+    }
+}
+
+enum DigitConsumeDecision {
+    Consumed,
+    PosOverflow,
+    NegOverflow,
+    Invalid,
+};
+
+template<typename T, T min_value, T max_value>
+class NumParser {
+    AK_MAKE_NONMOVABLE(NumParser)
+
+public:
+    NumParser(Sign sign, int base)
+        : m_base(base)
+        , m_num(0)
+        , m_sign(sign)
+    {
+        m_cutoff = positive() ? (max_value / base) : (min_value / base);
+        m_max_digit_after_cutoff = positive() ? (max_value % base) : (min_value % base);
+    }
+
+    int parse_digit(char ch)
+    {
+        int digit;
+        if (isdigit(ch))
+            digit = ch - '0';
+        else if (islower(ch))
+            digit = ch - ('a' - 10);
+        else if (isupper(ch))
+            digit = ch - ('A' - 10);
+        else
+            return -1;
+
+        if (digit >= m_base)
+            return -1;
+
+        return digit;
+    }
+
+    DigitConsumeDecision consume(char ch)
+    {
+        int digit = parse_digit(ch);
+        if (digit == -1)
+            return DigitConsumeDecision::Invalid;
+
+        if (!can_append_digit(digit)) {
+            if (m_sign != Sign::Negative) {
+                return DigitConsumeDecision::PosOverflow;
+            } else {
+                return DigitConsumeDecision::NegOverflow;
+            }
+        }
+
+        m_num *= m_base;
+        m_num += positive() ? digit : -digit;
+
+        return DigitConsumeDecision::Consumed;
+    }
+
+    T number() const { return m_num; };
+
+private:
+    bool can_append_digit(int digit)
+    {
+        const bool is_below_cutoff = positive() ? (m_num < m_cutoff) : (m_num > m_cutoff);
+
+        if (is_below_cutoff) {
+            return true;
+        } else {
+            return m_num == m_cutoff && digit < m_max_digit_after_cutoff;
+        }
+    }
+
+    bool positive() const
+    {
+        return m_sign != Sign::Negative;
+    }
+
+    const T m_base;
+    T m_num;
+    T m_cutoff;
+    int m_max_digit_after_cutoff;
+    Sign m_sign;
+};
+
+typedef NumParser<int, INT_MIN, INT_MAX> IntParser;
+typedef NumParser<long long, LONG_LONG_MIN, LONG_LONG_MAX> LongLongParser;
+
+static bool is_either(char* str, int offset, char lower, char upper)
+{
+    char ch = *(str + offset);
+    return ch == lower || ch == upper;
+}
+
 __attribute__((warn_unused_result)) int __generate_unique_filename(char* pattern)
 {
     size_t length = strlen(pattern);
@@ -308,162 +433,294 @@ int putenv(char* new_var)
 
 double strtod(const char* str, char** endptr)
 {
-    size_t len = strlen(str);
-    size_t weight = 1;
-    int exp_val = 0;
-    double value = 0.0f;
-    double fraction = 0.0f;
-    bool has_sign = false;
-    bool is_negative = false;
-    bool is_fractional = false;
-    bool is_scientific = false;
+    // Parse spaces, sign, and base
+    char* parse_ptr = const_cast<char*>(str);
+    strtons(parse_ptr, &parse_ptr);
+    const Sign sign = strtosign(parse_ptr, &parse_ptr);
+
+    // Parse inf/nan, if applicable.
+    if (is_either(parse_ptr, 0, 'i', 'I')) {
+        if (is_either(parse_ptr, 1, 'n', 'N')) {
+            if (is_either(parse_ptr, 2, 'f', 'F')) {
+                parse_ptr += 3;
+                if (is_either(parse_ptr, 0, 'i', 'I')) {
+                    if (is_either(parse_ptr, 1, 'n', 'N')) {
+                        if (is_either(parse_ptr, 2, 'i', 'I')) {
+                            if (is_either(parse_ptr, 3, 't', 'T')) {
+                                if (is_either(parse_ptr, 4, 'y', 'Y')) {
+                                    parse_ptr += 5;
+                                }
+                            }
+                        }
+                    }
+                }
+                if (endptr)
+                    *endptr = parse_ptr;
+                // Don't set errno to ERANGE here:
+                // The caller may want to distinguish between "input is
+                // literal infinity" and "input is not literal infinity
+                // but did not fit into double".
+                if (sign != Sign::Negative) {
+                    return __builtin_huge_val();
+                } else {
+                    return -__builtin_huge_val();
+                }
+            }
+        }
+    }
+    if (is_either(parse_ptr, 0, 'n', 'N')) {
+        if (is_either(parse_ptr, 1, 'a', 'A')) {
+            if (is_either(parse_ptr, 2, 'n', 'N')) {
+                if (endptr)
+                    *endptr = parse_ptr + 3;
+                errno = ERANGE;
+                if (sign != Sign::Negative) {
+                    return __builtin_nan("");
+                } else {
+                    return -__builtin_nan("");
+                }
+            }
+        }
+    }
 
-    if (str[0] == '-') {
-        is_negative = true;
-        has_sign = true;
+    // Parse base
+    char exponent_lower;
+    char exponent_upper;
+    int base = 10;
+    if (*parse_ptr == '0') {
+        const char base_ch = *(parse_ptr + 1);
+        if (base_ch == 'x' || base_ch == 'X') {
+            base = 16;
+            parse_ptr += 2;
+        }
     }
-    if (str[0] == '+') {
-        has_sign = true;
+
+    if (base == 10) {
+        exponent_lower = 'e';
+        exponent_upper = 'E';
+    } else {
+        exponent_lower = 'p';
+        exponent_upper = 'P';
     }
-    size_t i;
-    for (i = has_sign; i < len; i++) {
 
-        // Looks like we're about to start working on the fractional part
-        if (str[i] == '.') {
-            is_fractional = true;
+    // Parse "digits", possibly keeping track of the exponent offset.
+    // We parse the most significant digits and the position in the
+    // base-`base` representation separately. This allows us to handle
+    // numbers like `0.0000000000000000000000000000000000001234` or
+    // `1234567890123456789012345678901234567890` with ease.
+    LongLongParser digits { sign, base };
+    bool digits_usable = false;
+    bool should_continue = true;
+    bool digits_overflow = false;
+    bool after_decimal = false;
+    int exponent = 0;
+    do {
+        if (!after_decimal && *parse_ptr == '.') {
+            after_decimal = true;
+            parse_ptr += 1;
             continue;
         }
 
-        if (str[i] == 'e' || str[i] == 'E') {
-            if (str[i + 1] == '-')
-                exp_val = -atoi(str + i + 2);
-            else if (str[i + 1] == '+')
-                exp_val = atoi(str + i + 2);
-            else
-                exp_val = atoi(str + i + 1);
+        bool is_a_digit;
+        if (digits_overflow) {
+            is_a_digit = digits.parse_digit(*parse_ptr) != -1;
+        } else {
+            DigitConsumeDecision decision = digits.consume(*parse_ptr);
+            switch (decision) {
+            case DigitConsumeDecision::Consumed:
+                is_a_digit = true;
+                // The very first actual digit must pass here:
+                digits_usable = true;
+                break;
+            case DigitConsumeDecision::PosOverflow:
+                // fallthrough
+            case DigitConsumeDecision::NegOverflow:
+                is_a_digit = true;
+                digits_overflow = true;
+                break;
+            case DigitConsumeDecision::Invalid:
+                is_a_digit = false;
+                break;
+            default:
+                ASSERT_NOT_REACHED();
+            }
+        }
 
-            is_scientific = true;
-            continue;
+        if (is_a_digit) {
+            exponent -= after_decimal ? 1 : 0;
+            exponent += digits_overflow ? 1 : 0;
         }
 
-        if (str[i] < '0' || str[i] > '9' || exp_val != 0)
-            continue;
+        should_continue = is_a_digit;
+        parse_ptr += should_continue;
+    } while (should_continue);
 
-        if (is_fractional) {
-            fraction *= 10;
-            fraction += str[i] - '0';
-            weight *= 10;
+    if (!digits_usable) {
+        // No actual number value available.
+        if (endptr)
+            *endptr = const_cast<char*>(str);
+        return 0.0;
+    }
+
+    // Parse exponent.
+    // We already know the next character is not a digit in the current base,
+    // nor a valid decimal point. Check whether it's an exponent sign.
+    if (*parse_ptr == exponent_lower || *parse_ptr == exponent_upper) {
+        // Need to keep the old parse_ptr around, in case of rollback.
+        char* old_parse_ptr = parse_ptr;
+        parse_ptr += 1;
+
+        // Can't use atol or strtol here: Must accept excessive exponents,
+        // even exponents >64 bits.
+        Sign exponent_sign = strtosign(parse_ptr, &parse_ptr);
+        IntParser exponent_parser { exponent_sign, base };
+        bool exponent_usable = false;
+        bool exponent_overflow = false;
+        should_continue = true;
+        do {
+            bool is_a_digit;
+            if (exponent_overflow) {
+                is_a_digit = exponent_parser.parse_digit(*parse_ptr) != -1;
+            } else {
+                DigitConsumeDecision decision = exponent_parser.consume(*parse_ptr);
+                switch (decision) {
+                case DigitConsumeDecision::Consumed:
+                    is_a_digit = true;
+                    // The very first actual digit must pass here:
+                    exponent_usable = true;
+                    break;
+                case DigitConsumeDecision::PosOverflow:
+                    // fallthrough
+                case DigitConsumeDecision::NegOverflow:
+                    is_a_digit = true;
+                    exponent_overflow = true;
+                    break;
+                case DigitConsumeDecision::Invalid:
+                    is_a_digit = false;
+                    break;
+                default:
+                    ASSERT_NOT_REACHED();
+                }
+            }
+
+            should_continue = is_a_digit;
+            parse_ptr += should_continue;
+        } while (should_continue);
+
+        if (!exponent_usable) {
+            parse_ptr = old_parse_ptr;
+        } else if (exponent_overflow) {
+            // Technically this is wrong. If someone gives us 5GB of digits,
+            // and then an exponent of -5_000_000_000, the resulting exponent
+            // should be around 0.
+            // However, I think it's safe to assume that we never have to deal
+            // with that many digits anyway.
+            if (sign != Sign::Negative) {
+                exponent = INT_MIN;
+            } else {
+                exponent = INT_MAX;
+            }
         } else {
-            value = value * 10;
-            value += str[i] - '0';
+            // Literal exponent is usable and fits in an int.
+            // However, `exponent + exponent_parser.number()` might overflow an int.
+            // This would result in the wrong sign of the exponent!
+            long long new_exponent = static_cast<long long>(exponent) + static_cast<long long>(exponent_parser.number());
+            if (new_exponent < INT_MIN) {
+                exponent = INT_MIN;
+            } else if (new_exponent > INT_MAX) {
+                exponent = INT_MAX;
+            } else {
+                exponent = static_cast<int>(new_exponent);
+            }
         }
     }
 
-    fraction /= weight;
-    value += fraction;
+    // Parsing finished. now we only have to compute the result.
+    if (endptr)
+        *endptr = const_cast<char*>(parse_ptr);
 
-    if (is_scientific) {
-        bool divide = exp_val < 0;
-        if (divide)
-            exp_val *= -1;
+    // If `digits` is zero, we don't even have to look at `exponent`.
+    if (digits.number() == 0) {
+        if (sign != Sign::Negative) {
+            return 0.0;
+        } else {
+            return -0.0;
+        }
+    }
 
-        for (int i = 0; i < exp_val; i++) {
-            if (divide)
-                value /= 10;
-            else
-                value *= 10;
+    // Deal with extreme exponents.
+    // The smallest normal is 2^-1022.
+    // The smallest denormal is 2^-1074.
+    // The largest number in `digits` is 2^63 - 1.
+    // Therefore, if "base^exponent" is smaller than 2^-(1074+63), the result is 0.0 anyway.
+    // This threshold is roughly 5.3566 * 10^-343.
+    // So if the resulting exponent is -344 or lower (closer to -inf),
+    // the result is 0.0 anyway.
+    // We only need to avoid false positives, so we can ignore base 16.
+    if (exponent <= -344) {
+        errno = ERANGE;
+        // Definitely can't be represented more precisely.
+        // I lied, sometimes the result is +0.0, and sometimes -0.0.
+        if (sign != Sign::Negative) {
+            return 0.0;
+        } else {
+            return -0.0;
         }
     }
-    //FIXME: Not entirely sure if this is correct, but seems to work.
-    if (endptr)
-        *endptr = const_cast<char*>(str + i);
-    return is_negative ? -value : value;
+    // The largest normal is 2^+1024-eps.
+    // The smallest number in `digits` is 1.
+    // Therefore, if "base^exponent" is 2^+1024, the result is INF anyway.
+    // This threshold is roughly 1.7977 * 10^-308.
+    // So if the resulting exponent is +309 or higher,
+    // the result is INF anyway.
+    // We only need to avoid false positives, so we can ignore base 16.
+    if (exponent >= 309) {
+        errno = ERANGE;
+        // Definitely can't be represented more precisely.
+        // I lied, sometimes the result is +INF, and sometimes -INF.
+        if (sign != Sign::Negative) {
+            return __builtin_huge_val();
+        } else {
+            return -__builtin_huge_val();
+        }
+    }
+
+    // TODO: If `exponent` is large, this could be made faster.
+    double value = digits.number();
+    if (exponent < 0) {
+        exponent = -exponent;
+        for (int i = 0; i < exponent; ++i) {
+            value /= base;
+        }
+        if (value == -0.0 || value == +0.0) {
+            errno = ERANGE;
+        }
+    } else if (exponent > 0) {
+        for (int i = 0; i < exponent; ++i) {
+            value *= base;
+        }
+        if (value == -__builtin_huge_val() || value == +__builtin_huge_val()) {
+            errno = ERANGE;
+        }
+    }
+
+    return value;
 }
 
 long double strtold(const char* str, char** endptr)
 {
-    (void)str;
-    (void)endptr;
-    dbgprintf("LibC: strtold: '%s'\n", str);
-    ASSERT_NOT_REACHED();
+    assert(sizeof(double) == sizeof(long double));
+    return strtod(str, endptr);
 }
 
 float strtof(const char* str, char** endptr)
 {
-    (void)str;
-    (void)endptr;
-    dbgprintf("LibC: strtof: '%s'\n", str);
-    ASSERT_NOT_REACHED();
+    return strtod(str, endptr);
 }
 
 double atof(const char* str)
 {
-    size_t len = strlen(str);
-    size_t weight = 1;
-    int exp_val = 0;
-    double value = 0.0f;
-    double fraction = 0.0f;
-    bool has_sign = false;
-    bool is_negative = false;
-    bool is_fractional = false;
-    bool is_scientific = false;
-
-    if (str[0] == '-') {
-        is_negative = true;
-        has_sign = true;
-    }
-    if (str[0] == '+') {
-        has_sign = true;
-    }
-
-    for (size_t i = has_sign; i < len; i++) {
-
-        // Looks like we're about to start working on the fractional part
-        if (str[i] == '.') {
-            is_fractional = true;
-            continue;
-        }
-
-        if (str[i] == 'e' || str[i] == 'E') {
-            if (str[i + 1] == '-' || str[i + 1] == '+')
-                exp_val = atoi(str + i + 2);
-            else
-                exp_val = atoi(str + i + 1);
-
-            is_scientific = true;
-            continue;
-        }
-
-        if (str[i] < '0' || str[i] > '9' || exp_val != 0)
-            continue;
-
-        if (is_fractional) {
-            fraction *= 10;
-            fraction += str[i] - '0';
-            weight *= 10;
-        } else {
-            value = value * 10;
-            value += str[i] - '0';
-        }
-    }
-
-    fraction /= weight;
-    value += fraction;
-
-    if (is_scientific) {
-        bool divide = exp_val < 0;
-        if (divide)
-            exp_val *= -1;
-
-        for (int i = 0; i < exp_val; i++) {
-            if (divide)
-                value /= 10;
-            else
-                value *= 10;
-        }
-    }
-
-    return is_negative ? -value : value;
+    return strtod(str, nullptr);
 }
 
 int atoi(const char* str)