/* * Copyright (c) 2021, the SerenityOS developers. * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include enum class LengthModifier { None, Default, Char, Short, Long, LongLong, IntMax, Size, PtrDiff, LongDouble, }; enum class ConversionSpecifier { Unspecified, Decimal, Integer, Octal, Unsigned, Hex, Floating, String, UseScanList, Character, Pointer, OutputNumberOfBytes, Invalid, }; enum class ReadKind { Normal, Octal, Hex, Infer, }; template struct ReadElementConcrete { bool operator()(GenericLexer&, va_list) { return false; } }; template struct ReadElementConcrete { bool operator()(GenericLexer& lexer, va_list* ap) { lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, ApT*) : nullptr; long value = 0; char* endptr = nullptr; auto nptr = lexer.remaining().characters_without_null_termination(); if constexpr (kind == ReadKind::Normal) value = strtol(nptr, &endptr, 10); if constexpr (kind == ReadKind::Octal) value = strtol(nptr, &endptr, 8); if constexpr (kind == ReadKind::Hex) value = strtol(nptr, &endptr, 16); if constexpr (kind == ReadKind::Infer) value = strtol(nptr, &endptr, 0); if (!endptr) return false; if (endptr == nptr) return false; auto diff = endptr - nptr; VERIFY(diff > 0); lexer.ignore((size_t)diff); if (ptr) *ptr = value; return true; } }; template struct ReadElementConcrete { bool operator()(GenericLexer& lexer, va_list* ap) { static_assert(kind == ReadKind::Normal, "Can't read a non-normal character"); auto* ptr = ap ? va_arg(*ap, ApT*) : nullptr; if (lexer.is_eof()) return false; auto ch = lexer.consume(); if (ptr) *ptr = ch; return true; } }; template struct ReadElementConcrete { bool operator()(GenericLexer& lexer, va_list* ap) { lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, ApT*) : nullptr; unsigned long value = 0; char* endptr = nullptr; auto nptr = lexer.remaining().characters_without_null_termination(); if constexpr (kind == ReadKind::Normal) value = strtoul(nptr, &endptr, 10); if constexpr (kind == ReadKind::Octal) value = strtoul(nptr, &endptr, 8); if constexpr (kind == ReadKind::Hex) value = strtoul(nptr, &endptr, 16); if constexpr (kind == ReadKind::Infer) value = strtoul(nptr, &endptr, 0); if (!endptr) return false; if (endptr == nptr) return false; auto diff = endptr - nptr; VERIFY(diff > 0); lexer.ignore((size_t)diff); if (ptr) *ptr = value; return true; } }; template struct ReadElementConcrete { bool operator()(GenericLexer& lexer, va_list* ap) { lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, ApT*) : nullptr; long long value = 0; char* endptr = nullptr; auto nptr = lexer.remaining().characters_without_null_termination(); if constexpr (kind == ReadKind::Normal) value = strtoll(nptr, &endptr, 10); if constexpr (kind == ReadKind::Octal) value = strtoll(nptr, &endptr, 8); if constexpr (kind == ReadKind::Hex) value = strtoll(nptr, &endptr, 16); if constexpr (kind == ReadKind::Infer) value = strtoll(nptr, &endptr, 0); if (!endptr) return false; if (endptr == nptr) return false; auto diff = endptr - nptr; VERIFY(diff > 0); lexer.ignore((size_t)diff); if (ptr) *ptr = value; return true; } }; template struct ReadElementConcrete { bool operator()(GenericLexer& lexer, va_list* ap) { lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, ApT*) : nullptr; unsigned long long value = 0; char* endptr = nullptr; auto nptr = lexer.remaining().characters_without_null_termination(); if constexpr (kind == ReadKind::Normal) value = strtoull(nptr, &endptr, 10); if constexpr (kind == ReadKind::Octal) value = strtoull(nptr, &endptr, 8); if constexpr (kind == ReadKind::Hex) value = strtoull(nptr, &endptr, 16); if constexpr (kind == ReadKind::Infer) value = strtoull(nptr, &endptr, 0); if (!endptr) return false; if (endptr == nptr) return false; auto diff = endptr - nptr; VERIFY(diff > 0); lexer.ignore((size_t)diff); if (ptr) *ptr = value; return true; } }; template struct ReadElementConcrete { bool operator()(GenericLexer& lexer, va_list* ap) { lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, ApT*) : nullptr; double value = 0; char* endptr = nullptr; auto nptr = lexer.remaining().characters_without_null_termination(); if constexpr (kind == ReadKind::Normal) value = strtod(nptr, &endptr); else return false; if (!endptr) return false; if (endptr == nptr) return false; auto diff = endptr - nptr; VERIFY(diff > 0); lexer.ignore((size_t)diff); if (ptr) *ptr = value; return true; } }; template struct ReadElement { bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap) { switch (length_modifier) { default: case LengthModifier::None: VERIFY_NOT_REACHED(); case LengthModifier::Default: return ReadElementConcrete {}(input_lexer, ap); case LengthModifier::Char: return ReadElementConcrete {}(input_lexer, ap); case LengthModifier::Short: return ReadElementConcrete {}(input_lexer, ap); case LengthModifier::Long: if constexpr (IsSame) return ReadElementConcrete {}(input_lexer, ap); if constexpr (IsSame) return ReadElementConcrete {}(input_lexer, ap); if constexpr (IsSame) return ReadElementConcrete {}(input_lexer, ap); return false; case LengthModifier::LongLong: if constexpr (IsSame) return ReadElementConcrete {}(input_lexer, ap); if constexpr (IsSame) return ReadElementConcrete {}(input_lexer, ap); if constexpr (IsSame) return ReadElementConcrete {}(input_lexer, ap); return false; case LengthModifier::IntMax: return ReadElementConcrete {}(input_lexer, ap); case LengthModifier::Size: return ReadElementConcrete {}(input_lexer, ap); case LengthModifier::PtrDiff: return ReadElementConcrete {}(input_lexer, ap); case LengthModifier::LongDouble: return ReadElementConcrete {}(input_lexer, ap); } } }; template<> struct ReadElement { ReadElement(StringView scan_set = {}, bool invert = false) : scan_set(scan_set.is_null() ? " \t\n\f\r" : scan_set) , invert(scan_set.is_null() ? true : invert) , was_null(scan_set.is_null()) { } bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap) { // FIXME: Implement wide strings and such. if (length_modifier != LengthModifier::Default) return false; if (was_null) input_lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, char*) : nullptr; auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); }); if (str.is_empty()) return false; memcpy(ptr, str.characters_without_null_termination(), str.length()); ptr[str.length()] = 0; return true; } private: bool matches(char c) const { return invert ^ scan_set.contains(c); } const StringView scan_set; bool invert { false }; bool was_null { false }; }; template<> struct ReadElement { bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap) { if (length_modifier != LengthModifier::Default) return false; input_lexer.ignore_while(isspace); auto* ptr = ap ? va_arg(*ap, void**) : nullptr; auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); }); if (count != 8) { fail:; for (size_t i = 0; i < count; ++i) input_lexer.retreat(); return false; } char buf[9] { 0 }; memcpy(buf, str.characters_without_null_termination(), 8); buf[8] = 0; char* endptr = nullptr; auto value = strtoull(buf, &endptr, 16); if (endptr != &buf[8]) goto fail; memcpy(ptr, &value, sizeof(value)); return true; } private: bool should_consume(char c) { if (count == 8) return false; if (!isxdigit(c)) return false; ++count; return true; } size_t count { 0 }; }; extern "C" int vsscanf(const char* input, const char* format, va_list ap) { GenericLexer format_lexer { format }; GenericLexer input_lexer { input }; int elements_matched = 0; va_list copy; __builtin_va_copy(copy, ap); while (!format_lexer.is_eof()) { format_lexer.ignore_while(isspace); if (!format_lexer.next_is('%')) { read_one_literal:; input_lexer.ignore_while(isspace); if (format_lexer.is_eof()) break; auto next_char = format_lexer.consume(); if (!input_lexer.consume_specific(next_char)) return elements_matched; continue; } if (format_lexer.next_is("%%")) { format_lexer.ignore(); goto read_one_literal; } format_lexer.ignore(); // '%' bool suppress_assignment = false; if (format_lexer.next_is('*')) { suppress_assignment = true; format_lexer.ignore(); } // Parse width specification [[maybe_unused]] int width_specifier = 0; if (format_lexer.next_is(isdigit)) { auto width_digits = format_lexer.consume_while([](char c) { return isdigit(c); }); width_specifier = width_digits.to_int().value(); // FIXME: Actually use width specifier } bool invert_scanlist = false; StringView scanlist; LengthModifier length_modifier { LengthModifier::None }; ConversionSpecifier conversion_specifier { ConversionSpecifier::Unspecified }; reread_lookahead:; auto format_lookahead = format_lexer.peek(); if (length_modifier == LengthModifier::None) { switch (format_lookahead) { case 'h': if (format_lexer.peek(1) == 'h') { format_lexer.consume(2); length_modifier = LengthModifier::Char; } else { format_lexer.consume(1); length_modifier = LengthModifier::Short; } break; case 'l': if (format_lexer.peek(1) == 'l') { format_lexer.consume(2); length_modifier = LengthModifier::LongLong; } else { format_lexer.consume(1); length_modifier = LengthModifier::Long; } break; case 'j': format_lexer.consume(); length_modifier = LengthModifier::IntMax; break; case 'z': format_lexer.consume(); length_modifier = LengthModifier::Size; break; case 't': format_lexer.consume(); length_modifier = LengthModifier::PtrDiff; break; case 'L': format_lexer.consume(); length_modifier = LengthModifier::LongDouble; break; default: length_modifier = LengthModifier::Default; break; } goto reread_lookahead; } if (conversion_specifier == ConversionSpecifier::Unspecified) { switch (format_lookahead) { case 'd': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Decimal; break; case 'i': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Integer; break; case 'o': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Octal; break; case 'u': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Unsigned; break; case 'x': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Hex; break; case 'a': case 'e': case 'f': case 'g': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Floating; break; case 's': format_lexer.consume(); conversion_specifier = ConversionSpecifier::String; break; case '[': format_lexer.consume(); scanlist = format_lexer.consume_until(']'); if (scanlist.starts_with('^')) { scanlist = scanlist.substring_view(1); invert_scanlist = true; } conversion_specifier = ConversionSpecifier::UseScanList; break; case 'c': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Character; break; case 'p': format_lexer.consume(); conversion_specifier = ConversionSpecifier::Pointer; break; case 'n': format_lexer.consume(); conversion_specifier = ConversionSpecifier::OutputNumberOfBytes; break; case 'C': format_lexer.consume(); length_modifier = LengthModifier::Long; conversion_specifier = ConversionSpecifier::Character; break; case 'S': format_lexer.consume(); length_modifier = LengthModifier::Long; conversion_specifier = ConversionSpecifier::String; break; default: format_lexer.consume(); conversion_specifier = ConversionSpecifier::Invalid; break; } } auto* ap_or_null = !suppress_assignment ? (va_list*)© : nullptr; // Now try to read. switch (conversion_specifier) { case ConversionSpecifier::Invalid: case ConversionSpecifier::Unspecified: default: // "undefined behavior", let's be nice and crash. dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier); VERIFY_NOT_REACHED(); case ConversionSpecifier::Decimal: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Integer: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Octal: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Unsigned: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Hex: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Floating: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::String: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::UseScanList: if (!ReadElement { scanlist, invert_scanlist }(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Character: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::Pointer: if (!ReadElement {}(length_modifier, input_lexer, ap_or_null)) format_lexer.consume_all(); else ++elements_matched; break; case ConversionSpecifier::OutputNumberOfBytes: { input_lexer.ignore_while(isspace); if (!suppress_assignment) { auto* ptr = va_arg(copy, int*); *ptr = input_lexer.tell(); } break; } } } va_end(copy); return elements_matched; }