diff options
author | asynts <asynts@gmail.com> | 2020-10-02 15:21:30 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-10-02 20:48:19 +0200 |
commit | 6351a56d275714205be721ec192c0b98b4f917f3 (patch) | |
tree | ce2ed22f91ad7a0302029989c89a5994544ccf16 /AK | |
parent | ac5e08a541409bc47766be5196f5bec3140339dd (diff) | |
download | serenity-6351a56d275714205be721ec192c0b98b4f917f3.zip |
AK+Format: Do some housekeeping in the format implementation.
Diffstat (limited to 'AK')
-rw-r--r-- | AK/Format.cpp | 587 | ||||
-rw-r--r-- | AK/Format.h | 169 | ||||
-rw-r--r-- | AK/GenericLexer.cpp | 1 | ||||
-rw-r--r-- | AK/GenericLexer.h | 1 | ||||
-rw-r--r-- | AK/LogStream.h | 9 | ||||
-rw-r--r-- | AK/PrintfImplementation.h | 166 | ||||
-rw-r--r-- | AK/String.cpp | 4 | ||||
-rw-r--r-- | AK/String.h | 5 | ||||
-rw-r--r-- | AK/StringBuilder.h | 3 | ||||
-rw-r--r-- | AK/Tests/TestFormat.cpp | 18 | ||||
-rw-r--r-- | AK/Tests/TestPrintf.cpp | 124 |
11 files changed, 464 insertions, 623 deletions
diff --git a/AK/Format.cpp b/AK/Format.cpp index e9211cc570..5331b7621b 100644 --- a/AK/Format.cpp +++ b/AK/Format.cpp @@ -26,174 +26,97 @@ #include <AK/Format.h> #include <AK/GenericLexer.h> -#include <AK/PrintfImplementation.h> #include <AK/String.h> #include <AK/StringBuilder.h> #include <ctype.h> +namespace AK { + namespace { constexpr size_t use_next_index = NumericLimits<size_t>::max(); -struct FormatSpecifier { - StringView flags; - size_t index; -}; - -class FormatStringParser : public GenericLexer { -public: - explicit FormatStringParser(StringView input) - : GenericLexer(input) - { - } - - StringView consume_literal() - { - const auto begin = tell(); - - while (!is_eof()) { - if (consume_specific("{{")) - continue; - - if (consume_specific("}}")) - continue; - - if (next_is(is_any_of("{}"))) - return m_input.substring_view(begin, tell() - begin); - - consume(); - } - - return m_input.substring_view(begin); - } - - bool consume_number(size_t& value) - { - value = 0; +// The worst case is that we have the largest 64-bit value formatted as binary number, this would take +// 65 bytes. Choosing a larger power of two won't hurt and is a bit of mitigation against out-of-bounds accesses. +inline size_t convert_unsigned_to_string(u64 value, Array<u8, 128>& buffer, u8 base, bool upper_case) +{ + ASSERT(base >= 2 && base <= 16); - bool consumed_at_least_one = false; - while (next_is(isdigit)) { - value *= 10; - value += consume() - '0'; - consumed_at_least_one = true; - } + static constexpr const char* lowercase_lookup = "0123456789abcdef"; + static constexpr const char* uppercase_lookup = "0123456789ABCDEF"; - return consumed_at_least_one; + if (value == 0) { + buffer[0] = '0'; + return 1; } - bool consume_specifier(FormatSpecifier& specifier) - { - ASSERT(!next_is('}')); - - if (!consume_specific('{')) - return false; - - if (!consume_number(specifier.index)) - specifier.index = use_next_index; - - if (consume_specific(':')) { - const auto begin = tell(); - - size_t level = 1; - while (level > 0) { - ASSERT(!is_eof()); - - if (consume_specific('{')) { - ++level; - continue; - } - - if (consume_specific('}')) { - --level; - continue; - } - - consume(); - } - - specifier.flags = m_input.substring_view(begin, tell() - begin - 1); - } else { - if (!consume_specific('}')) - ASSERT_NOT_REACHED(); - - specifier.flags = ""; - } + size_t used = 0; + while (value > 0) { + if (upper_case) + buffer[used++] = uppercase_lookup[value % base]; + else + buffer[used++] = lowercase_lookup[value % base]; - return true; + value /= base; } - bool consume_replacement_field(size_t& index) - { - if (!consume_specific('{')) - return false; - - if (!consume_number(index)) - index = use_next_index; + for (size_t i = 0; i < used / 2; ++i) + swap(buffer[i], buffer[used - i - 1]); - if (!consume_specific('}')) - ASSERT_NOT_REACHED(); - - return true; - } -}; - -void write_escaped_literal(StringBuilder& builder, StringView literal) -{ - for (size_t idx = 0; idx < literal.length(); ++idx) { - builder.append(literal[idx]); - if (literal[idx] == '{' || literal[idx] == '}') - ++idx; - } + return used; } -void vformat_impl(StringBuilder& builder, FormatStringParser& parser, AK::FormatterContext& context) +void vformat_impl(TypeErasedFormatParams& params, FormatBuilder& builder, FormatParser& parser) { const auto literal = parser.consume_literal(); - write_escaped_literal(builder, literal); + builder.put_literal(literal); - FormatSpecifier specifier; + FormatParser::FormatSpecifier specifier; if (!parser.consume_specifier(specifier)) { ASSERT(parser.is_eof()); return; } if (specifier.index == use_next_index) - specifier.index = context.take_next_index(); - - ASSERT(specifier.index < context.parameter_count()); + specifier.index = params.take_next_index(); - context.set_flags(specifier.flags); + auto& parameter = params.parameters().at(specifier.index); - auto& parameter = context.parameter_at(specifier.index); - parameter.formatter(builder, parameter.value, context); + FormatParser argparser { specifier.flags }; + parameter.formatter(params, builder, argparser, parameter.value); - vformat_impl(builder, parser, context); + vformat_impl(params, builder, parser); } -size_t decode_value(size_t value, AK::FormatterContext& context) +} // namespace AK::{anonymous} + +size_t TypeErasedFormatParams::decode(size_t value, size_t default_value) { - if (value == AK::StandardFormatter::value_from_next_arg) - value = AK::StandardFormatter::value_from_arg + context.take_next_index(); + if (value == StandardFormatter::value_not_set) + return default_value; - if (value >= AK::StandardFormatter::value_from_arg) { - const auto parameter = context.parameter_at(value - AK::StandardFormatter::value_from_arg); + if (value == StandardFormatter::value_from_next_arg) + value = StandardFormatter::value_from_arg + take_next_index(); + + if (value >= StandardFormatter::value_from_arg) { + const auto parameter = parameters().at(value - StandardFormatter::value_from_arg); Optional<i64> svalue; - if (parameter.type == AK::TypeErasedParameter::Type::UInt8) + if (parameter.type == TypeErasedParameter::Type::UInt8) value = *reinterpret_cast<const u8*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::UInt16) + else if (parameter.type == TypeErasedParameter::Type::UInt16) value = *reinterpret_cast<const u16*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::UInt32) + else if (parameter.type == TypeErasedParameter::Type::UInt32) value = *reinterpret_cast<const u32*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::UInt64) + else if (parameter.type == TypeErasedParameter::Type::UInt64) value = *reinterpret_cast<const u64*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::Int8) + else if (parameter.type == TypeErasedParameter::Type::Int8) svalue = *reinterpret_cast<const i8*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::Int16) + else if (parameter.type == TypeErasedParameter::Type::Int16) svalue = *reinterpret_cast<const i16*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::Int32) + else if (parameter.type == TypeErasedParameter::Type::Int32) svalue = *reinterpret_cast<const i32*>(parameter.value); - else if (parameter.type == AK::TypeErasedParameter::Type::Int64) + else if (parameter.type == TypeErasedParameter::Type::Int64) svalue = *reinterpret_cast<const i64*>(parameter.value); else ASSERT_NOT_REACHED(); @@ -207,47 +130,274 @@ size_t decode_value(size_t value, AK::FormatterContext& context) return value; } -} // namespace +FormatParser::FormatParser(StringView input) + : GenericLexer(input) +{ +} +StringView FormatParser::consume_literal() +{ + const auto begin = tell(); -namespace AK { + while (!is_eof()) { + if (consume_specific("{{")) + continue; + + if (consume_specific("}}")) + continue; + + if (next_is(is_any_of("{}"))) + return m_input.substring_view(begin, tell() - begin); + + consume(); + } + + return m_input.substring_view(begin); +} +bool FormatParser::consume_number(size_t& value) +{ + value = 0; + + bool consumed_at_least_one = false; + while (next_is(isdigit)) { + value *= 10; + value += consume() - '0'; + consumed_at_least_one = true; + } + + return consumed_at_least_one; +} +bool FormatParser::consume_specifier(FormatSpecifier& specifier) +{ + ASSERT(!next_is('}')); + + if (!consume_specific('{')) + return false; + + if (!consume_number(specifier.index)) + specifier.index = use_next_index; + + if (consume_specific(':')) { + const auto begin = tell(); + + size_t level = 1; + while (level > 0) { + ASSERT(!is_eof()); + + if (consume_specific('{')) { + ++level; + continue; + } + + if (consume_specific('}')) { + --level; + continue; + } + + consume(); + } + + specifier.flags = m_input.substring_view(begin, tell() - begin - 1); + } else { + if (!consume_specific('}')) + ASSERT_NOT_REACHED(); + + specifier.flags = ""; + } + + return true; +} +bool FormatParser::consume_replacement_field(size_t& index) +{ + if (!consume_specific('{')) + return false; + + if (!consume_number(index)) + index = use_next_index; + + if (!consume_specific('}')) + ASSERT_NOT_REACHED(); + + return true; +} -void vformat(StringBuilder& builder, StringView fmtstr, Span<const TypeErasedParameter> parameters) +void FormatBuilder::put_padding(char fill, size_t amount) { - FormatStringParser parser { fmtstr }; - FormatterContext context { parameters }; - vformat_impl(builder, parser, context); + for (size_t i = 0; i < amount; ++i) + m_builder.append(fill); } -void vformat(const LogStream& stream, StringView fmtstr, Span<const TypeErasedParameter> parameters) +void FormatBuilder::put_literal(StringView value) +{ + for (size_t i = 0; i < value.length(); ++i) { + m_builder.append(value[i]); + if (value[i] == '{' || value[i] == '}') + ++i; + } +} +void FormatBuilder::put_string( + StringView value, + Align align, + size_t min_width, + size_t max_width, + char fill) +{ + const auto used_by_string = min(max_width, value.length()); + const auto used_by_padding = max(min_width, used_by_string) - used_by_string; + + if (used_by_string < value.length()) + value = value.substring_view(0, used_by_string); + + if (align == Align::Left || align == Align::Default) { + m_builder.append(value); + put_padding(fill, used_by_padding); + } else if (align == Align::Center) { + const auto used_by_left_padding = used_by_padding / 2; + const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2); + + put_padding(fill, used_by_left_padding); + m_builder.append(value); + put_padding(fill, used_by_right_padding); + } else if (align == Align::Right) { + put_padding(fill, used_by_padding); + m_builder.append(value); + } +} +void FormatBuilder::put_u64( + u64 value, + u8 base, + bool prefix, + bool upper_case, + bool zero_pad, + Align align, + size_t min_width, + char fill, + SignMode sign_mode, + bool is_negative) +{ + Array<u8, 128> buffer; + + const auto used_by_digits = convert_unsigned_to_string(value, buffer, base, upper_case); + + auto used_by_prefix = sign_mode == SignMode::OnlyIfNeeded ? static_cast<size_t>(is_negative) : 1; + if (prefix) { + if (base == 8) + used_by_prefix += 1; + else if (base == 16) + used_by_prefix += 2; + else if (base == 2) + used_by_prefix += 2; + } + + const auto used_by_field = used_by_prefix + used_by_digits; + const auto used_by_padding = max(used_by_field, min_width) - used_by_field; + + const auto put_prefix = [&]() { + if (is_negative) + m_builder.append('-'); + else if (sign_mode == SignMode::Always) + m_builder.append('+'); + else if (sign_mode == SignMode::Reserved) + m_builder.append(' '); + + if (prefix) { + if (base == 2) { + if (upper_case) + m_builder.append("0B"); + else + m_builder.append("0b"); + } else if (base == 8) { + m_builder.append("0"); + } else if (base == 16) { + if (upper_case) + m_builder.append("0X"); + else + m_builder.append("0x"); + } + } + }; + const auto put_digits = [&]() { + for (size_t i = 0; i < used_by_digits; ++i) + m_builder.append(buffer[i]); + }; + + if (align == Align::Left) { + const auto used_by_right_padding = used_by_padding; + + put_prefix(); + put_digits(); + put_padding(fill, used_by_right_padding); + } else if (align == Align::Center) { + const auto used_by_left_padding = used_by_padding / 2; + const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2); + + put_padding(fill, used_by_left_padding); + put_prefix(); + put_digits(); + put_padding(fill, used_by_right_padding); + } else if (align == Align::Right || align == Align::Default) { + const auto used_by_left_padding = used_by_padding; + + if (zero_pad) { + put_prefix(); + put_padding('0', used_by_left_padding); + put_digits(); + } else { + put_padding(fill, used_by_left_padding); + put_prefix(); + put_digits(); + } + } +} +void FormatBuilder::put_i64( + i64 value, + u8 base, + bool prefix, + bool upper_case, + bool zero_pad, + Align align, + size_t min_width, + char fill, + SignMode sign_mode) +{ + const auto is_negative = value < 0; + value = is_negative ? -value : value; + + put_u64(static_cast<size_t>(value), base, prefix, upper_case, zero_pad, align, min_width, fill, sign_mode, is_negative); +} + +void vformat(StringBuilder& builder, StringView fmtstr, TypeErasedFormatParams params) +{ + FormatBuilder fmtbuilder { builder }; + FormatParser parser { fmtstr }; + + vformat_impl(params, fmtbuilder, parser); +} +void vformat(const LogStream& stream, StringView fmtstr, TypeErasedFormatParams params) { StringBuilder builder; - FormatStringParser parser { fmtstr }; - FormatterContext context { parameters }; - vformat_impl(builder, parser, context); + vformat(builder, fmtstr, params); stream << builder.to_string(); } -void StandardFormatter::parse(FormatterContext& context) +void StandardFormatter::parse(TypeErasedFormatParams& params, FormatParser& parser) { - FormatStringParser parser { context.flags() }; - if (StringView { "<^>" }.contains(parser.peek(1))) { ASSERT(!parser.next_is(is_any_of("{}"))); m_fill = parser.consume(); } if (parser.consume_specific('<')) - m_align = Align::Left; + m_align = FormatBuilder::Align::Left; else if (parser.consume_specific('^')) - m_align = Align::Center; + m_align = FormatBuilder::Align::Center; else if (parser.consume_specific('>')) - m_align = Align::Right; + m_align = FormatBuilder::Align::Right; if (parser.consume_specific('-')) - m_sign = Sign::NegativeOnly; + m_sign_mode = FormatBuilder::SignMode::OnlyIfNeeded; else if (parser.consume_specific('+')) - m_sign = Sign::PositiveAndNegative; + m_sign_mode = FormatBuilder::SignMode::Always; else if (parser.consume_specific(' ')) - m_sign = Sign::ReserveSpace; + m_sign_mode = FormatBuilder::SignMode::Reserved; if (parser.consume_specific('#')) m_alternative_form = true; @@ -257,7 +407,7 @@ void StandardFormatter::parse(FormatterContext& context) if (size_t index = 0; parser.consume_replacement_field(index)) { if (index == use_next_index) - index = context.take_next_index(); + index = params.take_next_index(); m_width = value_from_arg + index; } else if (size_t width = 0; parser.consume_number(width)) { @@ -267,7 +417,7 @@ void StandardFormatter::parse(FormatterContext& context) if (parser.consume_specific('.')) { if (size_t index = 0; parser.consume_replacement_field(index)) { if (index == use_next_index) - index = context.take_next_index(); + index = params.take_next_index(); m_precision = value_from_arg + index; } else if (size_t precision = 0; parser.consume_number(precision)) { @@ -300,9 +450,9 @@ void StandardFormatter::parse(FormatterContext& context) ASSERT(parser.is_eof()); } -void Formatter<StringView>::format(StringBuilder& builder, StringView value, FormatterContext& context) +void Formatter<StringView>::format(TypeErasedFormatParams& params, FormatBuilder& builder, StringView value) { - if (m_sign != Sign::Default) + if (m_sign_mode != FormatBuilder::SignMode::Default) ASSERT_NOT_REACHED(); if (m_alternative_form) ASSERT_NOT_REACHED(); @@ -313,64 +463,32 @@ void Formatter<StringView>::format(StringBuilder& builder, StringView value, For if (m_width != value_not_set && m_precision != value_not_set) ASSERT_NOT_REACHED(); - if (m_align == Align::Default) - m_align = Align::Left; - - const auto width = decode_value(m_width, context); - const auto precision = decode_value(m_precision, context); - - const auto put_padding = [&](size_t amount, char fill) { - for (size_t i = 0; i < amount; ++i) - builder.append(fill); - }; - const auto put_bytes = [&](ReadonlyBytes bytes) { - for (size_t i = 0; i < bytes.size(); ++i) - builder.append(static_cast<char>(bytes[i])); - }; - - auto used_by_string = value.length(); - if (precision != value_not_set) - used_by_string = min(used_by_string, precision); - - const auto used_by_padding = width < used_by_string ? 0 : width - used_by_string; + const auto width = params.decode(m_width); + const auto precision = params.decode(m_precision, NumericLimits<size_t>::max()); - if (m_align == Align::Left) { - const auto used_by_right_padding = used_by_padding; + builder.put_string(value, m_align, width, precision, m_fill); +} - put_bytes(value.bytes().trim(used_by_string)); - put_padding(used_by_right_padding, m_fill); - return; - } - if (m_align == Align::Center) { - const auto used_by_left_padding = used_by_padding / 2; - const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2); +template<typename T> +void Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::format(TypeErasedFormatParams& params, FormatBuilder& builder, T value) +{ + if (m_mode == Mode::Character) { + // FIXME: We just support ASCII for now, in the future maybe unicode? + ASSERT(value >= 0 && value <= 127); - put_padding(used_by_left_padding, m_fill); - put_bytes(value.bytes().trim(used_by_string)); - put_padding(used_by_right_padding, m_fill); - return; - } - if (m_align == Align::Right) { - const auto used_by_left_padding = used_by_padding; + m_mode = Mode::String; - put_padding(used_by_left_padding, m_fill); - put_bytes(value.bytes().trim(used_by_string)); - return; + Formatter<StringView> formatter { *this }; + return formatter.format(params, builder, StringView { reinterpret_cast<const char*>(&value), 1 }); } - ASSERT_NOT_REACHED(); -} - -template<typename T> -void Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::format(StringBuilder& builder, T value, FormatterContext& context) -{ - if (m_precision != value_not_set) + if (m_precision != NumericLimits<size_t>::max()) ASSERT_NOT_REACHED(); if (m_mode == Mode::Pointer) { - if (m_sign != Sign::Default) + if (m_sign_mode != FormatBuilder::SignMode::Default) ASSERT_NOT_REACHED(); - if (m_align != Align::Default) + if (m_align != FormatBuilder::Align::Default) ASSERT_NOT_REACHED(); if (m_alternative_form) ASSERT_NOT_REACHED(); @@ -399,91 +517,26 @@ void Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type>::format(StringB } else if (m_mode == Mode::HexadecimalUppercase) { base = 16; upper_case = true; - } else if (m_mode == Mode::Character) { - // special case } else { ASSERT_NOT_REACHED(); } - const auto width = decode_value(m_width, context); - - const auto put_padding = [&](size_t amount, char fill) { - for (size_t i = 0; i < amount; ++i) - builder.append(fill); - }; - - if (m_mode == Mode::Character) { - // FIXME: We just support ASCII for now, in the future maybe unicode? - ASSERT(value >= 0 && value <= 127); - - const size_t used_by_value = 1; - const auto used_by_padding = width < used_by_value ? 0 : width - used_by_value; - - if (m_align == Align::Left || m_align == Align::Default) { - const auto used_by_right_padding = used_by_padding; - - builder.append(static_cast<char>(value)); - put_padding(used_by_right_padding, m_fill); - return; - } - if (m_align == Align::Center) { - const auto used_by_left_padding = used_by_padding / 2; - const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2); - - put_padding(used_by_left_padding, m_fill); - builder.append(static_cast<char>(value)); - put_padding(used_by_right_padding, m_fill); - return; - } - if (m_align == Align::Right) { - const auto used_by_left_padding = used_by_padding; - - put_padding(used_by_left_padding, m_fill); - builder.append(static_cast<char>(value)); - return; - } - - ASSERT_NOT_REACHED(); - } - - PrintfImplementation::Align align; - if (m_align == Align::Left) - align = PrintfImplementation::Align::Left; - else if (m_align == Align::Right) - align = PrintfImplementation::Align::Right; - else if (m_align == Align::Center) - align = PrintfImplementation::Align::Center; - else if (m_align == Align::Default) - align = PrintfImplementation::Align::Right; - else - ASSERT_NOT_REACHED(); - - PrintfImplementation::SignMode sign_mode; - if (m_sign == Sign::Default) - sign_mode = PrintfImplementation::SignMode::OnlyIfNeeded; - else if (m_sign == Sign::NegativeOnly) - sign_mode = PrintfImplementation::SignMode::OnlyIfNeeded; - else if (m_sign == Sign::PositiveAndNegative) - sign_mode = PrintfImplementation::SignMode::Always; - else if (m_sign == Sign::ReserveSpace) - sign_mode = PrintfImplementation::SignMode::Reserved; - else - ASSERT_NOT_REACHED(); + const auto width = params.decode(m_width); if (IsSame<typename MakeUnsigned<T>::Type, T>::value) - PrintfImplementation::convert_unsigned_to_string(value, builder, base, m_alternative_form, upper_case, m_zero_pad, align, width, m_fill, sign_mode); + builder.put_u64(value, base, m_alternative_form, upper_case, m_zero_pad, m_align, width, m_fill, m_sign_mode); else - PrintfImplementation::convert_signed_to_string(value, builder, base, m_alternative_form, upper_case, m_zero_pad, align, width, m_fill, sign_mode); + builder.put_i64(value, base, m_alternative_form, upper_case, m_zero_pad, m_align, width, m_fill, m_sign_mode); } -void Formatter<bool>::format(StringBuilder& builder, bool value, FormatterContext& context) +void Formatter<bool>::format(TypeErasedFormatParams& params, FormatBuilder& builder, bool value) { if (m_mode == Mode::Binary || m_mode == Mode::BinaryUppercase || m_mode == Mode::Decimal || m_mode == Mode::Octal || m_mode == Mode::Hexadecimal || m_mode == Mode::HexadecimalUppercase) { Formatter<u8> formatter { *this }; - return formatter.format(builder, static_cast<u8>(value), context); + return formatter.format(params, builder, static_cast<u8>(value)); } else { Formatter<StringView> formatter { *this }; - formatter.format(builder, value ? "true" : "false", context); + return formatter.format(params, builder, value ? "true" : "false"); } } diff --git a/AK/Format.h b/AK/Format.h index cf20ae942c..a8b479d6f9 100644 --- a/AK/Format.h +++ b/AK/Format.h @@ -27,6 +27,7 @@ #pragma once #include <AK/Array.h> +#include <AK/GenericLexer.h> #include <AK/StringView.h> // FIXME: I would really love to merge the format_value and make_type_erased_parameters functions, @@ -35,9 +36,15 @@ namespace AK { +class TypeErasedFormatParams; +class FormatParser; +class FormatBuilder; + template<typename T, typename = void> struct Formatter; +constexpr size_t max_format_arguments = 256; + struct TypeErasedParameter { enum class Type { UInt8, @@ -76,65 +83,131 @@ struct TypeErasedParameter { const void* value; Type type; - void (*formatter)(StringBuilder& builder, const void* value, class FormatterContext&); + void (*formatter)(TypeErasedFormatParams&, FormatBuilder&, FormatParser&, const void* value); }; -class FormatterContext { +class FormatParser : public GenericLexer { public: - FormatterContext(Span<const TypeErasedParameter> parameters) - : m_parameters(parameters) + struct FormatSpecifier { + StringView flags; + size_t index; + }; + + explicit FormatParser(StringView input); + + StringView consume_literal(); + bool consume_number(size_t& value); + bool consume_specifier(FormatSpecifier& specifier); + bool consume_replacement_field(size_t& index); +}; + +class FormatBuilder { +public: + enum class Align { + Default, + Left, + Center, + Right, + }; + enum class SignMode { + OnlyIfNeeded, + Always, + Reserved, + Default = OnlyIfNeeded, + }; + + explicit FormatBuilder(StringBuilder& builder) + : m_builder(builder) { } - const TypeErasedParameter& parameter_at(size_t index) const { return m_parameters.at(index); } - size_t parameter_count() const { return m_parameters.size(); } + void put_padding(char fill, size_t amount); + + void put_literal(StringView value); + + void put_string( + StringView value, + Align align = Align::Left, + size_t min_width = 0, + size_t max_width = NumericLimits<size_t>::max(), + char fill = ' '); + + void put_u64( + u64 value, + u8 base = 10, + bool prefix = false, + bool upper_case = false, + bool zero_pad = false, + Align align = Align::Right, + size_t min_width = 0, + char fill = ' ', + SignMode sign_mode = SignMode::OnlyIfNeeded, + bool is_negative = false); + + void put_i64( + i64 value, + u8 base = 10, + bool prefix = false, + bool upper_case = false, + bool zero_pad = false, + Align align = Align::Right, + size_t min_width = 0, + char fill = ' ', + SignMode sign_mode = SignMode::OnlyIfNeeded); + + const StringBuilder& builder() const { return m_builder; } + StringBuilder& builder() { return m_builder; } + +private: + StringBuilder& m_builder; +}; + +class TypeErasedFormatParams { +public: + explicit TypeErasedFormatParams(Span<const TypeErasedParameter> parameters) + : m_parameters(parameters) + { + } - StringView flags() const { return m_flags; } - void set_flags(StringView value) { m_flags = value; } + Span<const TypeErasedParameter> parameters() const { return m_parameters; } size_t take_next_index() { return m_next_index++; } + size_t decode(size_t value, size_t default_value = 0); + private: Span<const TypeErasedParameter> m_parameters; - StringView m_flags; size_t m_next_index { 0 }; }; -} // namespace AK - -namespace AK::Detail::Format { - template<typename T> -void format_value(StringBuilder& builder, const void* value, FormatterContext& context) +void __format_value(TypeErasedFormatParams& params, FormatBuilder& builder, FormatParser& parser, const void* value) { Formatter<T> formatter; - formatter.parse(context); - formatter.format(builder, *static_cast<const T*>(value), context); + formatter.parse(params, parser); + formatter.format(params, builder, *static_cast<const T*>(value)); } -} // namespace AK::Detail::Format +template<typename... Parameters> +class VariadicFormatParams : public TypeErasedFormatParams { +public: + static_assert(sizeof...(Parameters) <= max_format_arguments); -namespace AK { + explicit VariadicFormatParams(const Parameters&... parameters) + : TypeErasedFormatParams(m_data) + , m_data({ TypeErasedParameter { ¶meters, TypeErasedParameter::get_type<Parameters>(), __format_value<Parameters> }... }) + { + } -constexpr size_t max_format_arguments = 256; +private: + Array<TypeErasedParameter, sizeof...(Parameters)> m_data; +}; // We use the same format for most types for consistency. This is taken directly from std::format. // Not all valid options do anything yet. // https://en.cppreference.com/w/cpp/utility/format/formatter#Standard_format_specification struct StandardFormatter { - enum class Align { - Default, - Left, - Right, - Center, - }; - enum class Sign { - NegativeOnly, - PositiveAndNegative, - ReserveSpace, - Default = NegativeOnly - }; enum class Mode { Default, Binary, @@ -148,12 +221,12 @@ struct StandardFormatter { Pointer, }; - static constexpr size_t value_not_set = 0; - static constexpr size_t value_from_next_arg = NumericLimits<size_t>::max(); - static constexpr size_t value_from_arg = NumericLimits<size_t>::max() - max_format_arguments - 1; + static constexpr size_t value_not_set = NumericLimits<size_t>::max(); + static constexpr size_t value_from_next_arg = NumericLimits<size_t>::max() - 1; + static constexpr size_t value_from_arg = NumericLimits<size_t>::max() - max_format_arguments - 2; - Align m_align = Align::Default; - Sign m_sign = Sign::NegativeOnly; + FormatBuilder::Align m_align = FormatBuilder::Align::Default; + FormatBuilder::SignMode m_sign_mode = FormatBuilder::SignMode::OnlyIfNeeded; Mode m_mode = Mode::Default; bool m_alternative_form = false; char m_fill = ' '; @@ -161,7 +234,7 @@ struct StandardFormatter { size_t m_width = value_not_set; size_t m_precision = value_not_set; - void parse(FormatterContext&); + void parse(TypeErasedFormatParams&, FormatParser&); }; template<> @@ -172,7 +245,7 @@ struct Formatter<StringView> : StandardFormatter { { } - void format(StringBuilder& builder, StringView value, FormatterContext&); + void format(TypeErasedFormatParams&, FormatBuilder&, StringView value); }; template<> struct Formatter<const char*> : Formatter<StringView> { @@ -195,32 +268,24 @@ struct Formatter<T, typename EnableIf<IsIntegral<T>::value>::Type> : StandardFor { } - void format(StringBuilder&, T value, FormatterContext&); + void format(TypeErasedFormatParams&, FormatBuilder&, T value); }; template<typename T> struct Formatter<T*> : StandardFormatter { - void format(StringBuilder& builder, T* value, FormatterContext& context) + void format(TypeErasedFormatParams& params, FormatBuilder& builder, T* value) { Formatter<FlatPtr> formatter { *this }; - formatter.format(builder, reinterpret_cast<FlatPtr>(value), context); + formatter.format(params, builder, reinterpret_cast<FlatPtr>(value)); } }; template<> struct Formatter<bool> : StandardFormatter { - void format(StringBuilder&, bool value, FormatterContext&); + void format(TypeErasedFormatParams&, FormatBuilder&, bool value); }; -template<typename... Parameters> -Array<TypeErasedParameter, sizeof...(Parameters)> make_type_erased_parameters(const Parameters&... parameters) -{ - static_assert(sizeof...(Parameters) <= max_format_arguments); - - return { TypeErasedParameter { ¶meters, TypeErasedParameter::get_type<Parameters>(), Detail::Format::format_value<Parameters> }... }; -} - -void vformat(StringBuilder& builder, StringView fmtstr, Span<const TypeErasedParameter>); -void vformat(const LogStream& stream, StringView fmtstr, Span<const TypeErasedParameter>); +void vformat(StringBuilder& builder, StringView fmtstr, TypeErasedFormatParams); +void vformat(const LogStream& stream, StringView fmtstr, TypeErasedFormatParams); } // namespace AK diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index ff78aa9f9e..582ccf9a45 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -26,6 +26,7 @@ #include <AK/Assertions.h> #include <AK/GenericLexer.h> +#include <AK/String.h> #include <AK/StringBuilder.h> namespace AK { diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index bac642b828..0e0ffa9bc4 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -26,7 +26,6 @@ #pragma once -#include <AK/String.h> #include <AK/StringView.h> namespace AK { diff --git a/AK/LogStream.h b/AK/LogStream.h index 5cbafccadf..b68925465e 100644 --- a/AK/LogStream.h +++ b/AK/LogStream.h @@ -211,22 +211,19 @@ void dump_bytes(ReadonlyBytes); template<typename... Parameters> void outf(StringView fmtstr, const Parameters&... parameters) { - const auto type_erased_parameters = make_type_erased_parameters(parameters...); - vformat(out(), fmtstr, type_erased_parameters); + vformat(out(), fmtstr, VariadicFormatParams { parameters... }); } template<typename... Parameters> void warnf(StringView fmtstr, const Parameters&... parameters) { - const auto type_erased_parameters = make_type_erased_parameters(parameters...); - vformat(warn(), fmtstr, type_erased_parameters); + vformat(warn(), fmtstr, VariadicFormatParams { parameters... }); } #endif template<typename... Parameters> void dbgf(StringView fmtstr, const Parameters&... parameters) { - const auto type_erased_parameters = make_type_erased_parameters(parameters...); - vformat(dbg(), fmtstr, type_erased_parameters); + vformat(dbg(), fmtstr, VariadicFormatParams { parameters... }); } } diff --git a/AK/PrintfImplementation.h b/AK/PrintfImplementation.h index f7926968c2..fc4179feef 100644 --- a/AK/PrintfImplementation.h +++ b/AK/PrintfImplementation.h @@ -38,172 +38,6 @@ namespace PrintfImplementation { static constexpr const char* printf_hex_digits_lower = "0123456789abcdef"; static constexpr const char* printf_hex_digits_upper = "0123456789ABCDEF"; -enum class Align { - Left, - Center, - Right, -}; - -enum class SignMode { - OnlyIfNeeded, - Always, - Reserved -}; - -// The worst case is that we have the largest 64-bit value formatted as binary number, this would take -// 65 bytes. Choosing a larger power of two won't hurt and is a bit of mitigation against out-of-bounds accesses. -inline size_t convert_unsigned_to_string(u64 value, Array<u8, 128>& buffer, u8 base, bool upper_case) -{ - ASSERT(base >= 2 && base <= 16); - - static constexpr const char* lowercase_lookup = "0123456789abcdef"; - static constexpr const char* uppercase_lookup = "0123456789ABCDEF"; - - if (value == 0) { - buffer[0] = '0'; - return 1; - } - - size_t used = 0; - while (value > 0) { - if (upper_case) - buffer[used++] = uppercase_lookup[value % base]; - else - buffer[used++] = lowercase_lookup[value % base]; - - value /= base; - } - - // Reverse the list; I came up with this logic in like three seconds so it's probably wrong in some edge case. - for (size_t i = 0; i < used / 2; ++i) - swap(buffer[i], buffer[used - i - 1]); - - return used; -} - -inline size_t convert_unsigned_to_string( - u64 value, - StringBuilder& builder, - u8 base = 10, - bool prefix = false, - bool upper_case = false, - bool zero_pad = false, - Align align = Align::Right, - size_t width = 0, - char fill = ' ', - SignMode sign_mode = SignMode::OnlyIfNeeded, - bool is_negative = false) -{ - Array<u8, 128> buffer; - - const auto used_by_significant_digits = convert_unsigned_to_string(value, buffer, base, upper_case); - size_t used_by_prefix = sign_mode == SignMode::OnlyIfNeeded ? static_cast<size_t>(is_negative) : 1; - - if (prefix) { - if (base == 8) - used_by_prefix += 1; - else if (base == 16) - used_by_prefix += 2; - else if (base == 2) - used_by_prefix += 2; - } - - const auto put_prefix = [&]() { - if (is_negative) - builder.append('-'); - else if (sign_mode == SignMode::Always) - builder.append('+'); - else if (sign_mode == SignMode::Reserved) - builder.append(' '); - - if (prefix) { - if (base == 2) { - if (upper_case) - builder.append("0B"); - else - builder.append("0b"); - } else if (base == 8) { - builder.append("0"); - } else if (base == 16) { - if (upper_case) - builder.append("0X"); - else - builder.append("0x"); - } - } - }; - const auto put_padding = [&](size_t amount, char fill) { - for (size_t i = 0; i < amount; ++i) - builder.append(fill); - }; - const auto put_digits = [&]() { - builder.append(StringView { buffer.span().trim(used_by_significant_digits) }); - }; - - const auto used_by_field = used_by_significant_digits + used_by_prefix; - const auto used_by_padding = width < used_by_field ? 0 : width - used_by_field; - - if (align == Align::Left) { - const auto used_by_right_padding = used_by_padding; - - put_prefix(); - put_digits(); - put_padding(used_by_right_padding, fill); - - return used_by_field + used_by_right_padding; - } - - if (align == Align::Center) { - const auto used_by_left_padding = used_by_padding / 2; - const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2); - - put_padding(used_by_left_padding, fill); - put_prefix(); - put_digits(); - put_padding(used_by_right_padding, fill); - - return used_by_left_padding + used_by_field + used_by_right_padding; - } - - if (align == Align::Right) { - const auto used_by_left_padding = used_by_padding; - - if (zero_pad) { - put_prefix(); - put_padding(used_by_left_padding, '0'); - put_digits(); - } else { - put_padding(used_by_left_padding, fill); - put_prefix(); - put_digits(); - } - - return used_by_field + used_by_left_padding; - } - - ASSERT_NOT_REACHED(); -} - -inline size_t convert_signed_to_string( - i64 value, - StringBuilder& builder, - u8 base = 10, - bool common_prefix = false, - bool upper_case = false, - bool zero_pad = false, - Align align = Align::Right, - size_t width = 0, - char fill = ' ', - SignMode sign_mode = SignMode::OnlyIfNeeded) -{ - bool is_negative = value < 0; - - if (value < 0) - value = -value; - - return convert_unsigned_to_string(static_cast<size_t>(value), builder, base, common_prefix, upper_case, zero_pad, align, width, fill, sign_mode, is_negative); -} - #ifdef __serenity__ extern "C" size_t strlen(const char*); #else diff --git a/AK/String.cpp b/AK/String.cpp index ebb5c70dbc..177844fb61 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -441,10 +441,10 @@ InputStream& operator>>(InputStream& stream, String& string) } } -String String::vformatted(StringView fmtstr, Span<const TypeErasedParameter> parameters) +String String::vformatted(StringView fmtstr, TypeErasedFormatParams params) { StringBuilder builder; - vformat(builder, fmtstr, parameters); + vformat(builder, fmtstr, params); return builder.to_string(); } diff --git a/AK/String.h b/AK/String.h index e5b0d1ef0d..0d2cb3386a 100644 --- a/AK/String.h +++ b/AK/String.h @@ -239,13 +239,12 @@ public: static String format(const char*, ...); - static String vformatted(StringView fmtstr, Span<const TypeErasedParameter>); + static String vformatted(StringView fmtstr, TypeErasedFormatParams); template<typename... Parameters> static String formatted(StringView fmtstr, const Parameters&... parameters) { - const auto type_erased_parameters = make_type_erased_parameters(parameters...); - return vformatted(fmtstr, type_erased_parameters); + return vformatted(fmtstr, VariadicFormatParams { parameters... }); } template<typename T> diff --git a/AK/StringBuilder.h b/AK/StringBuilder.h index 54515a81bd..2ef784b29a 100644 --- a/AK/StringBuilder.h +++ b/AK/StringBuilder.h @@ -52,8 +52,7 @@ public: template<typename... Parameters> void appendff(StringView fmtstr, const Parameters&... parameters) { - const auto type_erased_parameters = make_type_erased_parameters(parameters...); - vformat(*this, fmtstr, type_erased_parameters); + vformat(*this, fmtstr, VariadicFormatParams { parameters... }); } String build() const; diff --git a/AK/Tests/TestFormat.cpp b/AK/Tests/TestFormat.cpp index 2441303e6d..fca71b7451 100644 --- a/AK/Tests/TestFormat.cpp +++ b/AK/Tests/TestFormat.cpp @@ -162,4 +162,22 @@ TEST_CASE(pointers) } } +// If the format implementation did absolutely nothing, all tests would pass. This +// is because when a test fails we only write "FAIL" to stdout using format. +// +// This is a bit scary, thus this test. At least this test should fail in this case. +TEST_CASE(ensure_that_format_works) +{ + + if (String::formatted("FAIL") != "FAIL") { + fprintf(stderr, "FAIL\n"); + exit(1); + } + + if (String::formatted("{} FAIL {}", 1, 2) != "1 FAIL 2") { + fprintf(stderr, "FAIL\n"); + exit(1); + } +} + TEST_MAIN(Format) diff --git a/AK/Tests/TestPrintf.cpp b/AK/Tests/TestPrintf.cpp deleted file mode 100644 index 84c455d744..0000000000 --- a/AK/Tests/TestPrintf.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2020, the SerenityOS developers. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <AK/TestSuite.h> - -#include <AK/PrintfImplementation.h> -#include <AK/StringBuilder.h> - -TEST_CASE(format_unsigned_with_internal_implementation) -{ - Array<u8, 128> buffer; - size_t used = 0; - - used = PrintfImplementation::convert_unsigned_to_string(12341234, buffer, 10, false); - EXPECT_EQ(StringView { buffer.span().trim(used) }, "12341234"); - - used = PrintfImplementation::convert_unsigned_to_string(12341234, buffer, 16, false); - EXPECT_EQ(StringView { buffer.span().trim(used) }, "bc4ff2"); - - used = PrintfImplementation::convert_unsigned_to_string(12341234, buffer, 16, true); - EXPECT_EQ(StringView { buffer.span().trim(used) }, "BC4FF2"); - - used = PrintfImplementation::convert_unsigned_to_string(0, buffer, 10, true); - EXPECT_EQ(StringView { buffer.span().trim(used) }, "0"); - - used = PrintfImplementation::convert_unsigned_to_string(NumericLimits<u64>::max(), buffer, 10, true); - EXPECT_EQ(StringView { buffer.span().trim(used) }, "18446744073709551615"); -} - -TEST_CASE(format_unsigned_just_pass_through) -{ - StringBuilder builder; - size_t used = 0; - - builder.clear(); - used = PrintfImplementation::convert_unsigned_to_string(12341234, builder); - EXPECT_EQ(used, 8u); - EXPECT_EQ(builder.to_string(), "12341234"); - - builder.clear(); - used = PrintfImplementation::convert_unsigned_to_string(12341234, builder, 16); - EXPECT_EQ(used, 6u); - EXPECT_EQ(builder.to_string(), "bc4ff2"); - - builder.clear(); - used = PrintfImplementation::convert_unsigned_to_string(12341234, builder, 16, false, true); - EXPECT_EQ(used, 6u); - EXPECT_EQ(builder.to_string(), "BC4FF2"); -} - -TEST_CASE(format_unsigned) -{ - StringBuilder builder; - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Right, 4, '*', PrintfImplementation::SignMode::OnlyIfNeeded); - EXPECT_EQ(builder.to_string(), "0042"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Left, 4, '*', PrintfImplementation::SignMode::OnlyIfNeeded); - EXPECT_EQ(builder.to_string(), "42**"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Center, 4, '*', PrintfImplementation::SignMode::OnlyIfNeeded); - EXPECT_EQ(builder.to_string(), "*42*"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Center, 9, '*', PrintfImplementation::SignMode::OnlyIfNeeded); - EXPECT_EQ(builder.to_string(), "***42****"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Center, 9, '*', PrintfImplementation::SignMode::Reserved); - EXPECT_EQ(builder.to_string(), "*** 42***"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Left, 4, '*', PrintfImplementation::SignMode::Always, true); - EXPECT_EQ(builder.to_string(), "-42*"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Center, 4, '*', PrintfImplementation::SignMode::Reserved, true); - EXPECT_EQ(builder.to_string(), "-42*"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(42, builder, 10, false, false, true, PrintfImplementation::Align::Right, 4, '*', PrintfImplementation::SignMode::OnlyIfNeeded, true); - EXPECT_EQ(builder.to_string(), "-042"); - - builder.clear(); - PrintfImplementation::convert_unsigned_to_string(32, builder, 16, true, false, true, PrintfImplementation::Align::Right, 8, '*', PrintfImplementation::SignMode::OnlyIfNeeded, true); - EXPECT_EQ(builder.to_string(), "-0x00020"); -} - -TEST_CASE(format_signed) -{ - StringBuilder builder; - - builder.clear(); - PrintfImplementation::convert_signed_to_string(42, builder, 10, false, false, false, PrintfImplementation::Align::Right, 8, '/', PrintfImplementation::SignMode::OnlyIfNeeded); - EXPECT_EQ(builder.to_string(), "//////42"); -} - -TEST_MAIN(Printf) |