summaryrefslogtreecommitdiff
path: root/AK/SIMDMath.h
blob: 4032ceabf1ef311d9fb8c7ed4b65fe08e46fccdd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#pragma once

#ifndef __SSE__
#    include <AK/Math.h>
#endif
#include <AK/SIMD.h>
#include <AK/SIMDExtras.h>
#include <math.h>

// Functions returning vectors or accepting vector arguments have different calling conventions
// depending on whether the target architecture supports SSE or not. GCC generates warning "psabi"
// when compiling for non-SSE architectures. We disable this warning because these functions
// are static and should never be visible from outside the translation unit that includes this header.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpsabi"

namespace AK::SIMD {

// Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX].
// Other inputs will generate unexpected results.

ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v)
{
    return to_f32x4(to_i32x4(v));
}

ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v)
{
    auto t = truncate_int_range(v);
    return t > v ? t - 1.0f : t;
}

ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v)
{
    auto t = truncate_int_range(v);
    return t < v ? t + 1.0f : t;
}

ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v)
{
    return v - floor_int_range(v);
}

ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max)
{
    return v < min ? min : (v > max ? max : v);
}

ALWAYS_INLINE static f32x4 clamp(f32x4 v, float min, float max)
{
    return v < min ? min : (v > max ? max : v);
}

ALWAYS_INLINE static f32x4 exp(f32x4 v)
{
    // FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times
    return f32x4 {
        expf(v[0]),
        expf(v[1]),
        expf(v[2]),
        expf(v[3]),
    };
}

ALWAYS_INLINE static f32x4 sqrt(f32x4 v)
{
#ifdef __SSE__
    return __builtin_ia32_sqrtps(v);
#else
    return f32x4 {
        AK::sqrt(v[0]),
        AK::sqrt(v[1]),
        AK::sqrt(v[2]),
        AK::sqrt(v[3]),
    };
#endif
}

ALWAYS_INLINE static f32x4 rsqrt(f32x4 v)
{
#ifdef __SSE__
    return __builtin_ia32_rsqrtps(v);
#else
    return f32x4 {
        1.f / AK::sqrt(v[0]),
        1.f / AK::sqrt(v[1]),
        1.f / AK::sqrt(v[2]),
        1.f / AK::sqrt(v[3]),
    };
#endif
}

}

#pragma GCC diagnostic pop