diff options
author | Hendiadyoin1 <leon.a@serenityos.org> | 2022-04-14 17:33:01 +0200 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2022-05-07 20:25:39 +0200 |
commit | 37ff2b9bd2399004df46246294c102509b0f0db9 (patch) | |
tree | 4ad85ccb2d098b976d8558c0bae8183e7c0488bd /AK | |
parent | 71b175d4edcf9a6fb6b391b833640005158215df (diff) | |
download | serenity-37ff2b9bd2399004df46246294c102509b0f0db9.zip |
AK: Add an helper for quick hardware based rounding
This uses the `fistp` and `cvts[sd]2si` respectively, to potentially
round floating point values with just one instruction.
This falls back to `llrint[fl]?` on aarch64 for now.
Diffstat (limited to 'AK')
-rw-r--r-- | AK/Math.h | 88 |
1 files changed, 86 insertions, 2 deletions
@@ -265,7 +265,7 @@ constexpr T tan(T angle) CONSTEXPR_STATE(tan, angle); #if ARCH(I386) || ARCH(X86_64) - double ret, one; + T ret, one; asm( "fptan" : "=t"(one), "=u"(ret) @@ -536,6 +536,89 @@ using Hyperbolic::cosh; using Hyperbolic::sinh; using Hyperbolic::tanh; +template<Integral I, FloatingPoint P> +ALWAYS_INLINE I round_to(P value) +{ +#if ARCH(I386) || ARCH(X86_64) + // Note: fistps outputs into a signed integer location (i16, i32, i64), + // so lets be nice and tell the compiler that. + Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret; + if constexpr (sizeof(I) == sizeof(i64)) { + asm("fistpll %0" + : "=m"(ret) + : "t"(value) + : "st"); + } else if constexpr (sizeof(I) == sizeof(i32)) { + asm("fistpl %0" + : "=m"(ret) + : "t"(value) + : "st"); + } else { + asm("fistps %0" + : "=m"(ret) + : "t"(value) + : "st"); + } + return static_cast<I>(ret); +#else + if constexpr (IsSame<P, long double>) + return static_cast<I>(llrintl(value)); + if constexpr (IsSame<P, double>) + return static_cast<I>(llrint(value)); + if constexpr (IsSame<P, float>) + return static_cast<I>(llrintf(value)); +#endif +} + +#ifdef __SSE__ +template<Integral I> +ALWAYS_INLINE I round_to(float value) +{ + if constexpr (sizeof(I) == sizeof(i64)) { + // Note: Outputting into 64-bit registers or memory locations requires the + // REX prefix, so we have to fall back to long doubles on i686 +# if ARCH(X86_64) + i64 ret; + asm("cvtss2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast<I>(ret); +# else + return round_to<I, long double>(value); +# endif + } + i32 ret; + asm("cvtss2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast<I>(ret); +} +#endif +#ifdef __SSE2__ +template<Integral I> +ALWAYS_INLINE I round_to(double value) +{ + if constexpr (sizeof(I) == sizeof(i64)) { + // Note: Outputting into 64-bit registers or memory locations requires the + // REX prefix, so we have to fall back to long doubles on i686 +# if ARCH(X86_64) + i64 ret; + asm("cvtsd2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast<I>(ret); +# else + return round_to<I, long double>(value); +# endif + } + i32 ret; + asm("cvtsd2si %1, %0" + : "=r"(ret) + : "xm"(value)); + return static_cast<I>(ret); +} +#endif + template<FloatingPoint T> constexpr T pow(T x, T y) { @@ -563,5 +646,6 @@ constexpr T pow(T x, T y) } #undef CONSTEXPR_STATE - } + +using AK::round_to; |