diff options
author | Stephan Unverwerth <s.unverwerth@serenityos.org> | 2022-01-06 17:03:24 +0100 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2022-01-09 16:21:13 +0330 |
commit | 444a15bad34e2eb23486f049b23e8e186b2f11a9 (patch) | |
tree | 9c5e1b3eb0b25f660d970eddd2d24fa86f489944 /AK/SIMDMath.h | |
parent | 178a57bbf7cf3b0b70c3484f7324a659e8fcac33 (diff) | |
download | serenity-444a15bad34e2eb23486f049b23e8e186b2f11a9.zip |
AK: Add SIMDMath.h with vectorized version of math functions
Diffstat (limited to 'AK/SIMDMath.h')
-rw-r--r-- | AK/SIMDMath.h | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/AK/SIMDMath.h b/AK/SIMDMath.h new file mode 100644 index 0000000000..60bc8c6e80 --- /dev/null +++ b/AK/SIMDMath.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/SIMD.h> +#include <math.h> + +// Returning a vector on i686 target generates warning "psabi". +// This prevents the CI, treating this as an error, from running to completion. +#pragma GCC diagnostic push +#pragma GCC diagnostic warning "-Wpsabi" + +namespace AK::SIMD { + +// Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX]. +// Other inputs will generate unexpected results. + +ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v) +{ + return to_f32x4(to_i32x4(v)); +} + +ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v) +{ + auto t = truncate_int_range(v); + return t > v ? t - 1.0f : t; +} + +ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v) +{ + auto t = truncate_int_range(v); + return t < v ? t + 1.0f : t; +} + +ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v) +{ + return v - floor_int_range(v); +} + +ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max) +{ + return v < min ? min : (v > max ? max : v); +} + +ALWAYS_INLINE static f32x4 exp(f32x4 v) +{ + // FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times + return f32x4 { + expf(v[0]), + expf(v[1]), + expf(v[2]), + expf(v[3]), + }; +} + +#pragma GCC diagnostic pop + +} |