summaryrefslogtreecommitdiff
path: root/AK/SIMDMath.h
diff options
context:
space:
mode:
authorStephan Unverwerth <s.unverwerth@serenityos.org>2022-01-06 17:03:24 +0100
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2022-01-09 16:21:13 +0330
commit444a15bad34e2eb23486f049b23e8e186b2f11a9 (patch)
tree9c5e1b3eb0b25f660d970eddd2d24fa86f489944 /AK/SIMDMath.h
parent178a57bbf7cf3b0b70c3484f7324a659e8fcac33 (diff)
downloadserenity-444a15bad34e2eb23486f049b23e8e186b2f11a9.zip
AK: Add SIMDMath.h with vectorized version of math functions
Diffstat (limited to 'AK/SIMDMath.h')
-rw-r--r--AK/SIMDMath.h62
1 files changed, 62 insertions, 0 deletions
diff --git a/AK/SIMDMath.h b/AK/SIMDMath.h
new file mode 100644
index 0000000000..60bc8c6e80
--- /dev/null
+++ b/AK/SIMDMath.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/SIMD.h>
+#include <math.h>
+
+// Returning a vector on i686 target generates warning "psabi".
+// This prevents the CI, treating this as an error, from running to completion.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wpsabi"
+
+namespace AK::SIMD {
+
+// Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX].
+// Other inputs will generate unexpected results.
+
+ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v)
+{
+ return to_f32x4(to_i32x4(v));
+}
+
+ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v)
+{
+ auto t = truncate_int_range(v);
+ return t > v ? t - 1.0f : t;
+}
+
+ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v)
+{
+ auto t = truncate_int_range(v);
+ return t < v ? t + 1.0f : t;
+}
+
+ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v)
+{
+ return v - floor_int_range(v);
+}
+
+ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max)
+{
+ return v < min ? min : (v > max ? max : v);
+}
+
+ALWAYS_INLINE static f32x4 exp(f32x4 v)
+{
+ // FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times
+ return f32x4 {
+ expf(v[0]),
+ expf(v[1]),
+ expf(v[2]),
+ expf(v[3]),
+ };
+}
+
+#pragma GCC diagnostic pop
+
+}