blob: 0c193d7ed4569d3d41b3c3b7a5c0d3de75dbfeab (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
/*
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Concepts.h>
#include <AK/Math.h>
#include <AK/QuickSelect.h>
#include <AK/QuickSort.h>
#include <AK/Vector.h>
namespace AK {
static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;
template<Arithmetic T = float>
class Statistics {
public:
Statistics() = default;
~Statistics() = default;
void add(T const& value)
{
// FIXME: Check for an overflow
m_sum += value;
m_values.append(value);
}
T const sum() const { return m_sum; }
// FIXME: Unclear Wording, average can mean a lot of different things
// Median, Arithmetic Mean (which this is), Geometric Mean, Harmonic Mean etc
float average() const
{
// Let's assume the average of an empty dataset is 0
if (size() == 0)
return 0;
// TODO: sum might overflow so maybe do multiple partial sums and intermediate divisions here
return (float)sum() / size();
}
T const min() const
{
// Lets Rather fail than read over the end of a collection
VERIFY(size() != 0);
T minimum = m_values[0];
for (T number : values()) {
if (number < minimum) {
minimum = number;
}
}
return minimum;
}
T const max() const
{
// Lets Rather fail than read over the end of a collection
VERIFY(size() != 0);
T maximum = m_values[0];
for (T number : values()) {
if (number > maximum) {
maximum = number;
}
}
return maximum;
}
T const median()
{
// Let's assume the Median of an empty dataset is 0
if (size() == 0)
return 0;
// If the number of values is even, the median is the arithmetic mean of the two middle values
if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
quick_sort(m_values);
return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
} else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
quick_sort(m_values);
return m_values.at(m_values.size() / 2);
} else if (size() % 2 == 0) {
auto index = size() / 2;
auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
return (median1 + median2) / 2;
}
return m_values.at(AK::quickselect_inplace(m_values, size() / 2));
}
float standard_deviation() const { return sqrt(variance()); }
float variance() const
{
float summation = 0;
float avg = average();
for (T number : values()) {
float difference = (float)number - avg;
summation += (difference * difference);
}
summation = summation / size();
return summation;
}
Vector<T> const& values() const { return m_values; }
size_t size() const { return m_values.size(); }
private:
Vector<T> m_values;
T m_sum {};
};
}
|