summaryrefslogtreecommitdiff
path: root/AK/Statistics.h
blob: 0c193d7ed4569d3d41b3c3b7a5c0d3de75dbfeab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
 * Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#pragma once

#include <AK/Concepts.h>
#include <AK/Math.h>
#include <AK/QuickSelect.h>
#include <AK/QuickSort.h>
#include <AK/Vector.h>

namespace AK {

static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;

template<Arithmetic T = float>
class Statistics {
public:
    Statistics() = default;
    ~Statistics() = default;

    void add(T const& value)
    {
        // FIXME: Check for an overflow
        m_sum += value;
        m_values.append(value);
    }

    T const sum() const { return m_sum; }

    // FIXME: Unclear Wording, average can mean a lot of different things
    // Median, Arithmetic Mean (which this is), Geometric Mean, Harmonic Mean etc
    float average() const
    {
        // Let's assume the average of an empty dataset is 0
        if (size() == 0)
            return 0;

        // TODO: sum might overflow so maybe do multiple partial sums and intermediate divisions here
        return (float)sum() / size();
    }

    T const min() const
    {
        // Lets Rather fail than read over the end of a collection
        VERIFY(size() != 0);

        T minimum = m_values[0];
        for (T number : values()) {
            if (number < minimum) {
                minimum = number;
            }
        }
        return minimum;
    }

    T const max() const
    {
        // Lets Rather fail than read over the end of a collection
        VERIFY(size() != 0);

        T maximum = m_values[0];
        for (T number : values()) {
            if (number > maximum) {
                maximum = number;
            }
        }
        return maximum;
    }

    T const median()
    {
        // Let's assume the Median of an empty dataset is 0
        if (size() == 0)
            return 0;

        // If the number of values is even, the median is the arithmetic mean of the two middle values
        if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
            quick_sort(m_values);
            return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
        } else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
            quick_sort(m_values);
            return m_values.at(m_values.size() / 2);
        } else if (size() % 2 == 0) {
            auto index = size() / 2;
            auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
            auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
            return (median1 + median2) / 2;
        }
        return m_values.at(AK::quickselect_inplace(m_values, size() / 2));
    }

    float standard_deviation() const { return sqrt(variance()); }
    float variance() const
    {
        float summation = 0;
        float avg = average();
        for (T number : values()) {
            float difference = (float)number - avg;
            summation += (difference * difference);
        }
        summation = summation / size();
        return summation;
    }

    Vector<T> const& values() const { return m_values; }
    size_t size() const { return m_values.size(); }

private:
    Vector<T> m_values;
    T m_sum {};
};

}