From 37ff2b9bd2399004df46246294c102509b0f0db9 Mon Sep 17 00:00:00 2001 From: Hendiadyoin1 Date: Thu, 14 Apr 2022 17:33:01 +0200 Subject: AK: Add an helper for quick hardware based rounding This uses the `fistp` and `cvts[sd]2si` respectively, to potentially round floating point values with just one instruction. This falls back to `llrint[fl]?` on aarch64 for now. --- AK/Math.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) (limited to 'AK') diff --git a/AK/Math.h b/AK/Math.h index 09debce7cd..a382e55299 100644 --- a/AK/Math.h +++ b/AK/Math.h @@ -265,7 +265,7 @@ constexpr T tan(T angle) CONSTEXPR_STATE(tan, angle); #if ARCH(I386) || ARCH(X86_64) - double ret, one; + T ret, one; asm( "fptan" : "=t"(one), "=u"(ret) @@ -536,6 +536,89 @@ using Hyperbolic::cosh; using Hyperbolic::sinh; using Hyperbolic::tanh; +template +ALWAYS_INLINE I round_to(P value) +{ +#if ARCH(I386) || ARCH(X86_64) + // Note: fistps outputs into a signed integer location (i16, i32, i64), + // so lets be nice and tell the compiler that. + Conditional= sizeof(i16), MakeSigned, i16> ret; + if constexpr (sizeof(I) == sizeof(i64)) { + asm("fistpll %0" + : "=m"(ret) + : "t"(value) + : "st"); + } else if constexpr (sizeof(I) == sizeof(i32)) { + asm("fistpl %0" + : "=m"(ret) + : "t"(value) + : "st"); + } else { + asm("fistps %0" + : "=m"(ret) + : "t"(value) + : "st"); + } + return static_cast(ret); +#else + if constexpr (IsSame) + return static_cast(llrintl(value)); + if constexpr (IsSame) + return static_cast(llrint(value)); + if constexpr (IsSame) + return static_cast(llrintf(value)); +#endif +} + +#ifdef __SSE__ +template +ALWAYS_INLINE I round_to(float value) +{ + if constexpr (sizeof(I) == sizeof(i64)) { + // Note: Outputting into 64-bit registers or memory locations requires the + // REX prefix, so we have to fall back to long doubles on i686 +# if ARCH(X86_64) + i64 ret; + asm("cvtss2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast(ret); +# else + return round_to(value); +# endif + } + i32 ret; + asm("cvtss2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast(ret); +} +#endif +#ifdef __SSE2__ +template +ALWAYS_INLINE I round_to(double value) +{ + if constexpr (sizeof(I) == sizeof(i64)) { + // Note: Outputting into 64-bit registers or memory locations requires the + // REX prefix, so we have to fall back to long doubles on i686 +# if ARCH(X86_64) + i64 ret; + asm("cvtsd2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast(ret); +# else + return round_to(value); +# endif + } + i32 ret; + asm("cvtsd2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast(ret); +} +#endif + template constexpr T pow(T x, T y) { @@ -563,5 +646,6 @@ constexpr T pow(T x, T y) } #undef CONSTEXPR_STATE - } + +using AK::round_to; -- cgit v1.2.3