summaryrefslogtreecommitdiff
path: root/AK
diff options
context:
space:
mode:
authorHendiadyoin1 <leon.a@serenityos.org>2022-04-14 17:33:01 +0200
committerLinus Groh <mail@linusgroh.de>2022-05-07 20:25:39 +0200
commit37ff2b9bd2399004df46246294c102509b0f0db9 (patch)
tree4ad85ccb2d098b976d8558c0bae8183e7c0488bd /AK
parent71b175d4edcf9a6fb6b391b833640005158215df (diff)
downloadserenity-37ff2b9bd2399004df46246294c102509b0f0db9.zip
AK: Add an helper for quick hardware based rounding
This uses the `fistp` and `cvts[sd]2si` respectively, to potentially round floating point values with just one instruction. This falls back to `llrint[fl]?` on aarch64 for now.
Diffstat (limited to 'AK')
-rw-r--r--AK/Math.h88
1 files changed, 86 insertions, 2 deletions
diff --git a/AK/Math.h b/AK/Math.h
index 09debce7cd..a382e55299 100644
--- a/AK/Math.h
+++ b/AK/Math.h
@@ -265,7 +265,7 @@ constexpr T tan(T angle)
CONSTEXPR_STATE(tan, angle);
#if ARCH(I386) || ARCH(X86_64)
- double ret, one;
+ T ret, one;
asm(
"fptan"
: "=t"(one), "=u"(ret)
@@ -536,6 +536,89 @@ using Hyperbolic::cosh;
using Hyperbolic::sinh;
using Hyperbolic::tanh;
+template<Integral I, FloatingPoint P>
+ALWAYS_INLINE I round_to(P value)
+{
+#if ARCH(I386) || ARCH(X86_64)
+ // Note: fistps outputs into a signed integer location (i16, i32, i64),
+ // so lets be nice and tell the compiler that.
+ Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret;
+ if constexpr (sizeof(I) == sizeof(i64)) {
+ asm("fistpll %0"
+ : "=m"(ret)
+ : "t"(value)
+ : "st");
+ } else if constexpr (sizeof(I) == sizeof(i32)) {
+ asm("fistpl %0"
+ : "=m"(ret)
+ : "t"(value)
+ : "st");
+ } else {
+ asm("fistps %0"
+ : "=m"(ret)
+ : "t"(value)
+ : "st");
+ }
+ return static_cast<I>(ret);
+#else
+ if constexpr (IsSame<P, long double>)
+ return static_cast<I>(llrintl(value));
+ if constexpr (IsSame<P, double>)
+ return static_cast<I>(llrint(value));
+ if constexpr (IsSame<P, float>)
+ return static_cast<I>(llrintf(value));
+#endif
+}
+
+#ifdef __SSE__
+template<Integral I>
+ALWAYS_INLINE I round_to(float value)
+{
+ if constexpr (sizeof(I) == sizeof(i64)) {
+ // Note: Outputting into 64-bit registers or memory locations requires the
+ // REX prefix, so we have to fall back to long doubles on i686
+# if ARCH(X86_64)
+ i64 ret;
+ asm("cvtss2si %1, %0"
+ : "=r"(ret)
+ : "xm"(value));
+ return static_cast<I>(ret);
+# else
+ return round_to<I, long double>(value);
+# endif
+ }
+ i32 ret;
+ asm("cvtss2si %1, %0"
+ : "=r"(ret)
+ : "xm"(value));
+ return static_cast<I>(ret);
+}
+#endif
+#ifdef __SSE2__
+template<Integral I>
+ALWAYS_INLINE I round_to(double value)
+{
+ if constexpr (sizeof(I) == sizeof(i64)) {
+ // Note: Outputting into 64-bit registers or memory locations requires the
+ // REX prefix, so we have to fall back to long doubles on i686
+# if ARCH(X86_64)
+ i64 ret;
+ asm("cvtsd2si %1, %0"
+ : "=r"(ret)
+ : "xm"(value));
+ return static_cast<I>(ret);
+# else
+ return round_to<I, long double>(value);
+# endif
+ }
+ i32 ret;
+ asm("cvtsd2si %1, %0"
+ : "=r"(ret)
+ : "xm"(value));
+ return static_cast<I>(ret);
+}
+#endif
+
template<FloatingPoint T>
constexpr T pow(T x, T y)
{
@@ -563,5 +646,6 @@ constexpr T pow(T x, T y)
}
#undef CONSTEXPR_STATE
-
}
+
+using AK::round_to;