summaryrefslogtreecommitdiff
path: root/AK/StdLibExtras.cpp
blob: 31b3b9221cd9b8c50eb46c21f3dace44127c7380 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include <AK/StdLibExtras.h>
#include <AK/Assertions.h>
#include <AK/Types.h>
#include <AK/kstdio.h>

extern "C" {

void* mmx_memcpy(void* dest, const void* src, size_t len)
{
    ASSERT(len >= 1024);

    auto* dest_ptr = (byte*)dest;
    auto* src_ptr = (const byte*)src;

    if ((dword)dest_ptr & 7) {
        dword prologue = 8 - ((dword)dest_ptr & 7);
        len -= prologue;
        asm volatile(
            "rep movsb\n"
            : "=S"(src_ptr), "=D"(dest_ptr), "=c"(prologue)
            : "0"(src_ptr), "1"(dest_ptr), "2"(prologue)
            : "memory"
        );
    }
    for (dword i = len / 64; i; --i) {
        asm volatile(
                    "movq (%0), %%mm0\n"
                    "movq 8(%0), %%mm1\n"
                    "movq 16(%0), %%mm2\n"
                    "movq 24(%0), %%mm3\n"
                    "movq 32(%0), %%mm4\n"
                    "movq 40(%0), %%mm5\n"
                    "movq 48(%0), %%mm6\n"
                    "movq 56(%0), %%mm7\n"
                    "movq %%mm0, (%1)\n"
                    "movq %%mm1, 8(%1)\n"
                    "movq %%mm2, 16(%1)\n"
                    "movq %%mm3, 24(%1)\n"
                    "movq %%mm4, 32(%1)\n"
                    "movq %%mm5, 40(%1)\n"
                    "movq %%mm6, 48(%1)\n"
                    "movq %%mm7, 56(%1)\n"
                    :: "r" (src_ptr), "r" (dest_ptr) : "memory");
        src_ptr += 64;
        dest_ptr += 64;
    }
    asm volatile("emms":::"memory");
    // Whatever remains we'll have to memcpy.
    len %= 64;
    if (len)
        memcpy(dest_ptr, src_ptr, len);
    return dest;
}

static inline uint32_t divq(uint64_t n, uint32_t d)
{
    uint32_t n1 = n >> 32;
    uint32_t n0 = n;
    uint32_t q;
    uint32_t r;
    asm volatile("divl %4" : "=d"(r), "=a"(q) : "0"(n1), "1"(n0), "rm"(d));
    return q;
}

static uint64_t unsigned_divide64(uint64_t n, uint64_t d)
{
    if ((d >> 32) == 0)  {
        uint64_t b = 1ULL << 32;
        uint32_t n1 = n >> 32;
        uint32_t n0 = n;
        uint32_t d0 = d;
        return divq(b * (n1 % d0) + n0, d0) + b * (n1 / d0);
    }
    if (n < d)
        return 0;
    uint32_t d1 = d >> 32u;
    int s = __builtin_clz(d1);
    uint64_t q = divq(n >> 1, (d << s) >> 32) >> (31 - s);
    return n - (q - 1) * d < d ? q - 1 : q;
}

static uint32_t unsigned_modulo64(uint64_t n, uint64_t d)
{
    return n - d * unsigned_divide64(n, d);
}

static int64_t signed_divide64(int64_t n, int64_t d)
{
    uint64_t n_abs = n >= 0 ? (uint64_t)n : -(uint64_t)n;
    uint64_t d_abs = d >= 0 ? (uint64_t)d : -(uint64_t)d;
    uint64_t q_abs = unsigned_divide64(n_abs, d_abs);
    return (n < 0) == (d < 0) ? (int64_t)q_abs : -(int64_t)q_abs;
}

static int32_t signed_modulo64(int64_t n, int64_t d)
{
    return n - d * signed_divide64(n, d);
}

int64_t __divdi3(int64_t n, int64_t d)
{
    return signed_divide64 (n, d);
}

int64_t __moddi3(int64_t n, int64_t d)
{
    return signed_modulo64(n, d);
}

uint64_t __udivdi3(uint64_t n, uint64_t d)
{
    return unsigned_divide64(n, d);
}

uint64_t __umoddi3(uint64_t n, uint64_t d)
{
    return unsigned_modulo64(n, d);
}

uint64_t __udivmoddi4(uint64_t n, uint64_t d, uint64_t* r)
{
    *r = unsigned_modulo64(n, d);
    return unsigned_divide64(n, d);
}

}