diff options
-rw-r--r-- | Libraries/LibC/stdlib.cpp | 507 |
1 files changed, 382 insertions, 125 deletions
diff --git a/Libraries/LibC/stdlib.cpp b/Libraries/LibC/stdlib.cpp index a823ffaf91..c5d84191cd 100644 --- a/Libraries/LibC/stdlib.cpp +++ b/Libraries/LibC/stdlib.cpp @@ -26,6 +26,7 @@ #include <AK/Assertions.h> #include <AK/HashMap.h> +#include <AK/Noncopyable.h> #include <AK/StdLibExtras.h> #include <AK/Types.h> #include <AK/Utf8View.h> @@ -129,6 +130,130 @@ static inline T strtol_impl(const char* nptr, char** endptr, int base) return num; } +static void strtons(const char* str, char** endptr) +{ + assert(endptr); + char* ptr = const_cast<char*>(str); + while (isspace(*ptr)) { + ptr += 1; + } + *endptr = ptr; +} + +enum Sign { + Negative, + Positive, +}; + +static Sign strtosign(const char* str, char** endptr) +{ + assert(endptr); + if (*str == '+') { + *endptr = const_cast<char*>(str + 1); + return Sign::Positive; + } else if (*str == '-') { + *endptr = const_cast<char*>(str + 1); + return Sign::Negative; + } else { + *endptr = const_cast<char*>(str); + return Sign::Positive; + } +} + +enum DigitConsumeDecision { + Consumed, + PosOverflow, + NegOverflow, + Invalid, +}; + +template<typename T, T min_value, T max_value> +class NumParser { + AK_MAKE_NONMOVABLE(NumParser) + +public: + NumParser(Sign sign, int base) + : m_base(base) + , m_num(0) + , m_sign(sign) + { + m_cutoff = positive() ? (max_value / base) : (min_value / base); + m_max_digit_after_cutoff = positive() ? (max_value % base) : (min_value % base); + } + + int parse_digit(char ch) + { + int digit; + if (isdigit(ch)) + digit = ch - '0'; + else if (islower(ch)) + digit = ch - ('a' - 10); + else if (isupper(ch)) + digit = ch - ('A' - 10); + else + return -1; + + if (digit >= m_base) + return -1; + + return digit; + } + + DigitConsumeDecision consume(char ch) + { + int digit = parse_digit(ch); + if (digit == -1) + return DigitConsumeDecision::Invalid; + + if (!can_append_digit(digit)) { + if (m_sign != Sign::Negative) { + return DigitConsumeDecision::PosOverflow; + } else { + return DigitConsumeDecision::NegOverflow; + } + } + + m_num *= m_base; + m_num += positive() ? digit : -digit; + + return DigitConsumeDecision::Consumed; + } + + T number() const { return m_num; }; + +private: + bool can_append_digit(int digit) + { + const bool is_below_cutoff = positive() ? (m_num < m_cutoff) : (m_num > m_cutoff); + + if (is_below_cutoff) { + return true; + } else { + return m_num == m_cutoff && digit < m_max_digit_after_cutoff; + } + } + + bool positive() const + { + return m_sign != Sign::Negative; + } + + const T m_base; + T m_num; + T m_cutoff; + int m_max_digit_after_cutoff; + Sign m_sign; +}; + +typedef NumParser<int, INT_MIN, INT_MAX> IntParser; +typedef NumParser<long long, LONG_LONG_MIN, LONG_LONG_MAX> LongLongParser; + +static bool is_either(char* str, int offset, char lower, char upper) +{ + char ch = *(str + offset); + return ch == lower || ch == upper; +} + __attribute__((warn_unused_result)) int __generate_unique_filename(char* pattern) { size_t length = strlen(pattern); @@ -308,162 +433,294 @@ int putenv(char* new_var) double strtod(const char* str, char** endptr) { - size_t len = strlen(str); - size_t weight = 1; - int exp_val = 0; - double value = 0.0f; - double fraction = 0.0f; - bool has_sign = false; - bool is_negative = false; - bool is_fractional = false; - bool is_scientific = false; + // Parse spaces, sign, and base + char* parse_ptr = const_cast<char*>(str); + strtons(parse_ptr, &parse_ptr); + const Sign sign = strtosign(parse_ptr, &parse_ptr); + + // Parse inf/nan, if applicable. + if (is_either(parse_ptr, 0, 'i', 'I')) { + if (is_either(parse_ptr, 1, 'n', 'N')) { + if (is_either(parse_ptr, 2, 'f', 'F')) { + parse_ptr += 3; + if (is_either(parse_ptr, 0, 'i', 'I')) { + if (is_either(parse_ptr, 1, 'n', 'N')) { + if (is_either(parse_ptr, 2, 'i', 'I')) { + if (is_either(parse_ptr, 3, 't', 'T')) { + if (is_either(parse_ptr, 4, 'y', 'Y')) { + parse_ptr += 5; + } + } + } + } + } + if (endptr) + *endptr = parse_ptr; + // Don't set errno to ERANGE here: + // The caller may want to distinguish between "input is + // literal infinity" and "input is not literal infinity + // but did not fit into double". + if (sign != Sign::Negative) { + return __builtin_huge_val(); + } else { + return -__builtin_huge_val(); + } + } + } + } + if (is_either(parse_ptr, 0, 'n', 'N')) { + if (is_either(parse_ptr, 1, 'a', 'A')) { + if (is_either(parse_ptr, 2, 'n', 'N')) { + if (endptr) + *endptr = parse_ptr + 3; + errno = ERANGE; + if (sign != Sign::Negative) { + return __builtin_nan(""); + } else { + return -__builtin_nan(""); + } + } + } + } - if (str[0] == '-') { - is_negative = true; - has_sign = true; + // Parse base + char exponent_lower; + char exponent_upper; + int base = 10; + if (*parse_ptr == '0') { + const char base_ch = *(parse_ptr + 1); + if (base_ch == 'x' || base_ch == 'X') { + base = 16; + parse_ptr += 2; + } } - if (str[0] == '+') { - has_sign = true; + + if (base == 10) { + exponent_lower = 'e'; + exponent_upper = 'E'; + } else { + exponent_lower = 'p'; + exponent_upper = 'P'; } - size_t i; - for (i = has_sign; i < len; i++) { - // Looks like we're about to start working on the fractional part - if (str[i] == '.') { - is_fractional = true; + // Parse "digits", possibly keeping track of the exponent offset. + // We parse the most significant digits and the position in the + // base-`base` representation separately. This allows us to handle + // numbers like `0.0000000000000000000000000000000000001234` or + // `1234567890123456789012345678901234567890` with ease. + LongLongParser digits { sign, base }; + bool digits_usable = false; + bool should_continue = true; + bool digits_overflow = false; + bool after_decimal = false; + int exponent = 0; + do { + if (!after_decimal && *parse_ptr == '.') { + after_decimal = true; + parse_ptr += 1; continue; } - if (str[i] == 'e' || str[i] == 'E') { - if (str[i + 1] == '-') - exp_val = -atoi(str + i + 2); - else if (str[i + 1] == '+') - exp_val = atoi(str + i + 2); - else - exp_val = atoi(str + i + 1); + bool is_a_digit; + if (digits_overflow) { + is_a_digit = digits.parse_digit(*parse_ptr) != -1; + } else { + DigitConsumeDecision decision = digits.consume(*parse_ptr); + switch (decision) { + case DigitConsumeDecision::Consumed: + is_a_digit = true; + // The very first actual digit must pass here: + digits_usable = true; + break; + case DigitConsumeDecision::PosOverflow: + // fallthrough + case DigitConsumeDecision::NegOverflow: + is_a_digit = true; + digits_overflow = true; + break; + case DigitConsumeDecision::Invalid: + is_a_digit = false; + break; + default: + ASSERT_NOT_REACHED(); + } + } - is_scientific = true; - continue; + if (is_a_digit) { + exponent -= after_decimal ? 1 : 0; + exponent += digits_overflow ? 1 : 0; } - if (str[i] < '0' || str[i] > '9' || exp_val != 0) - continue; + should_continue = is_a_digit; + parse_ptr += should_continue; + } while (should_continue); - if (is_fractional) { - fraction *= 10; - fraction += str[i] - '0'; - weight *= 10; + if (!digits_usable) { + // No actual number value available. + if (endptr) + *endptr = const_cast<char*>(str); + return 0.0; + } + + // Parse exponent. + // We already know the next character is not a digit in the current base, + // nor a valid decimal point. Check whether it's an exponent sign. + if (*parse_ptr == exponent_lower || *parse_ptr == exponent_upper) { + // Need to keep the old parse_ptr around, in case of rollback. + char* old_parse_ptr = parse_ptr; + parse_ptr += 1; + + // Can't use atol or strtol here: Must accept excessive exponents, + // even exponents >64 bits. + Sign exponent_sign = strtosign(parse_ptr, &parse_ptr); + IntParser exponent_parser { exponent_sign, base }; + bool exponent_usable = false; + bool exponent_overflow = false; + should_continue = true; + do { + bool is_a_digit; + if (exponent_overflow) { + is_a_digit = exponent_parser.parse_digit(*parse_ptr) != -1; + } else { + DigitConsumeDecision decision = exponent_parser.consume(*parse_ptr); + switch (decision) { + case DigitConsumeDecision::Consumed: + is_a_digit = true; + // The very first actual digit must pass here: + exponent_usable = true; + break; + case DigitConsumeDecision::PosOverflow: + // fallthrough + case DigitConsumeDecision::NegOverflow: + is_a_digit = true; + exponent_overflow = true; + break; + case DigitConsumeDecision::Invalid: + is_a_digit = false; + break; + default: + ASSERT_NOT_REACHED(); + } + } + + should_continue = is_a_digit; + parse_ptr += should_continue; + } while (should_continue); + + if (!exponent_usable) { + parse_ptr = old_parse_ptr; + } else if (exponent_overflow) { + // Technically this is wrong. If someone gives us 5GB of digits, + // and then an exponent of -5_000_000_000, the resulting exponent + // should be around 0. + // However, I think it's safe to assume that we never have to deal + // with that many digits anyway. + if (sign != Sign::Negative) { + exponent = INT_MIN; + } else { + exponent = INT_MAX; + } } else { - value = value * 10; - value += str[i] - '0'; + // Literal exponent is usable and fits in an int. + // However, `exponent + exponent_parser.number()` might overflow an int. + // This would result in the wrong sign of the exponent! + long long new_exponent = static_cast<long long>(exponent) + static_cast<long long>(exponent_parser.number()); + if (new_exponent < INT_MIN) { + exponent = INT_MIN; + } else if (new_exponent > INT_MAX) { + exponent = INT_MAX; + } else { + exponent = static_cast<int>(new_exponent); + } } } - fraction /= weight; - value += fraction; + // Parsing finished. now we only have to compute the result. + if (endptr) + *endptr = const_cast<char*>(parse_ptr); - if (is_scientific) { - bool divide = exp_val < 0; - if (divide) - exp_val *= -1; + // If `digits` is zero, we don't even have to look at `exponent`. + if (digits.number() == 0) { + if (sign != Sign::Negative) { + return 0.0; + } else { + return -0.0; + } + } - for (int i = 0; i < exp_val; i++) { - if (divide) - value /= 10; - else - value *= 10; + // Deal with extreme exponents. + // The smallest normal is 2^-1022. + // The smallest denormal is 2^-1074. + // The largest number in `digits` is 2^63 - 1. + // Therefore, if "base^exponent" is smaller than 2^-(1074+63), the result is 0.0 anyway. + // This threshold is roughly 5.3566 * 10^-343. + // So if the resulting exponent is -344 or lower (closer to -inf), + // the result is 0.0 anyway. + // We only need to avoid false positives, so we can ignore base 16. + if (exponent <= -344) { + errno = ERANGE; + // Definitely can't be represented more precisely. + // I lied, sometimes the result is +0.0, and sometimes -0.0. + if (sign != Sign::Negative) { + return 0.0; + } else { + return -0.0; } } - //FIXME: Not entirely sure if this is correct, but seems to work. - if (endptr) - *endptr = const_cast<char*>(str + i); - return is_negative ? -value : value; + // The largest normal is 2^+1024-eps. + // The smallest number in `digits` is 1. + // Therefore, if "base^exponent" is 2^+1024, the result is INF anyway. + // This threshold is roughly 1.7977 * 10^-308. + // So if the resulting exponent is +309 or higher, + // the result is INF anyway. + // We only need to avoid false positives, so we can ignore base 16. + if (exponent >= 309) { + errno = ERANGE; + // Definitely can't be represented more precisely. + // I lied, sometimes the result is +INF, and sometimes -INF. + if (sign != Sign::Negative) { + return __builtin_huge_val(); + } else { + return -__builtin_huge_val(); + } + } + + // TODO: If `exponent` is large, this could be made faster. + double value = digits.number(); + if (exponent < 0) { + exponent = -exponent; + for (int i = 0; i < exponent; ++i) { + value /= base; + } + if (value == -0.0 || value == +0.0) { + errno = ERANGE; + } + } else if (exponent > 0) { + for (int i = 0; i < exponent; ++i) { + value *= base; + } + if (value == -__builtin_huge_val() || value == +__builtin_huge_val()) { + errno = ERANGE; + } + } + + return value; } long double strtold(const char* str, char** endptr) { - (void)str; - (void)endptr; - dbgprintf("LibC: strtold: '%s'\n", str); - ASSERT_NOT_REACHED(); + assert(sizeof(double) == sizeof(long double)); + return strtod(str, endptr); } float strtof(const char* str, char** endptr) { - (void)str; - (void)endptr; - dbgprintf("LibC: strtof: '%s'\n", str); - ASSERT_NOT_REACHED(); + return strtod(str, endptr); } double atof(const char* str) { - size_t len = strlen(str); - size_t weight = 1; - int exp_val = 0; - double value = 0.0f; - double fraction = 0.0f; - bool has_sign = false; - bool is_negative = false; - bool is_fractional = false; - bool is_scientific = false; - - if (str[0] == '-') { - is_negative = true; - has_sign = true; - } - if (str[0] == '+') { - has_sign = true; - } - - for (size_t i = has_sign; i < len; i++) { - - // Looks like we're about to start working on the fractional part - if (str[i] == '.') { - is_fractional = true; - continue; - } - - if (str[i] == 'e' || str[i] == 'E') { - if (str[i + 1] == '-' || str[i + 1] == '+') - exp_val = atoi(str + i + 2); - else - exp_val = atoi(str + i + 1); - - is_scientific = true; - continue; - } - - if (str[i] < '0' || str[i] > '9' || exp_val != 0) - continue; - - if (is_fractional) { - fraction *= 10; - fraction += str[i] - '0'; - weight *= 10; - } else { - value = value * 10; - value += str[i] - '0'; - } - } - - fraction /= weight; - value += fraction; - - if (is_scientific) { - bool divide = exp_val < 0; - if (divide) - exp_val *= -1; - - for (int i = 0; i < exp_val; i++) { - if (divide) - value /= 10; - else - value *= 10; - } - } - - return is_negative ? -value : value; + return strtod(str, nullptr); } int atoi(const char* str) |