summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStaubfinger <staubfingercodinggames@gmail.com>2023-01-08 14:55:54 +0100
committerJelle Raaijmakers <jelle@gmta.nl>2023-02-03 19:04:15 +0100
commitda1023fcc59a5edc3e47b6f88763e1ded9477564 (patch)
tree89394422253ac1cf49f46985cf3f638a8b37099d
parent6b9344e86c8cf053cbef58aa57f04e738543a63d (diff)
downloadserenity-da1023fcc59a5edc3e47b6f88763e1ded9477564.zip
AK: Add thresholds to `quickselect_inline` and `Statistics::Median`
I did a bit of Profiling and made the quickselect and median algorithms use the best of option for the respective input size.
-rw-r--r--AK/QuickSelect.h13
-rw-r--r--AK/Statistics.h12
-rw-r--r--Tests/AK/CMakeLists.txt1
-rw-r--r--Tests/AK/TestStatistics.cpp70
4 files changed, 92 insertions, 4 deletions
diff --git a/AK/QuickSelect.h b/AK/QuickSelect.h
index 81a7973e10..bd11c11ded 100644
--- a/AK/QuickSelect.h
+++ b/AK/QuickSelect.h
@@ -11,6 +11,9 @@
#include <AK/StdLibExtras.h>
namespace AK {
+
+static constexpr int MEDIAN_OF_MEDIAN_CUTOFF = 4500;
+
// FIXME: Stole and adapted these two functions from `Userland/Demos/Tubes/Tubes.cpp` we really need something like this in `AK/Random.h`
static inline double random_double()
{
@@ -161,9 +164,13 @@ size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn)
template<typename Collection>
size_t quickselect_inplace(Collection& collection, size_t k)
{
- // By default, lets use middle_element to match `quicksort`
- return quickselect_inplace(
- collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::middle_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; });
+ if (collection.size() >= MEDIAN_OF_MEDIAN_CUTOFF)
+ return quickselect_inplace(
+ collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::median_of_medians(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; });
+
+ else
+ return quickselect_inplace(
+ collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::random_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; });
}
}
diff --git a/AK/Statistics.h b/AK/Statistics.h
index 93df7bd110..0c193d7ed4 100644
--- a/AK/Statistics.h
+++ b/AK/Statistics.h
@@ -9,10 +9,14 @@
#include <AK/Concepts.h>
#include <AK/Math.h>
#include <AK/QuickSelect.h>
+#include <AK/QuickSort.h>
#include <AK/Vector.h>
namespace AK {
+static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
+static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;
+
template<Arithmetic T = float>
class Statistics {
public:
@@ -75,7 +79,13 @@ public:
return 0;
// If the number of values is even, the median is the arithmetic mean of the two middle values
- if (size() % 2 == 0) {
+ if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
+ quick_sort(m_values);
+ return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
+ } else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
+ quick_sort(m_values);
+ return m_values.at(m_values.size() / 2);
+ } else if (size() % 2 == 0) {
auto index = size() / 2;
auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
diff --git a/Tests/AK/CMakeLists.txt b/Tests/AK/CMakeLists.txt
index 408c4575a3..c59619642e 100644
--- a/Tests/AK/CMakeLists.txt
+++ b/Tests/AK/CMakeLists.txt
@@ -67,6 +67,7 @@ set(AK_TEST_SOURCES
TestSourceLocation.cpp
TestSpan.cpp
TestStack.cpp
+ TestStatistics.cpp
TestStdLibExtras.cpp
TestString.cpp
TestStringFloatingPointConversions.cpp
diff --git a/Tests/AK/TestStatistics.cpp b/Tests/AK/TestStatistics.cpp
new file mode 100644
index 0000000000..2caa61db8d
--- /dev/null
+++ b/Tests/AK/TestStatistics.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023, the SerenityOS developers.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/Statistics.h>
+#include <LibTest/TestSuite.h>
+
+TEST_CASE(Statistics)
+{
+ // Setup Test Data
+ AK::Statistics<double> odd_number_elements;
+ AK::Statistics<double> even_number_elements;
+ AK::Statistics<double> odd_number_elements_large;
+ AK::Statistics<double> even_number_elements_large;
+
+ odd_number_elements.add(5.0);
+ odd_number_elements.add(4.0);
+ odd_number_elements.add(3.0);
+ odd_number_elements.add(2.0);
+ odd_number_elements.add(1.0);
+
+ even_number_elements.add(6.0);
+ even_number_elements.add(5.0);
+ even_number_elements.add(4.0);
+ even_number_elements.add(3.0);
+ even_number_elements.add(2.0);
+ even_number_elements.add(1.0);
+
+ for (int i = 201; i > 0; i--) {
+ odd_number_elements_large.add(i);
+ }
+
+ for (int i = 360; i > 0; i--) {
+ even_number_elements_large.add(i);
+ }
+
+ // Sum
+ EXPECT_APPROXIMATE(odd_number_elements.sum(), 15.0);
+ EXPECT_APPROXIMATE(even_number_elements.sum(), 21.0);
+
+ // Average
+ EXPECT_APPROXIMATE(odd_number_elements.average(), 3.0);
+ EXPECT_APPROXIMATE(even_number_elements.average(), 3.5);
+
+ // Min
+ EXPECT_APPROXIMATE(odd_number_elements.min(), 1.0);
+ EXPECT_APPROXIMATE(even_number_elements.min(), 1.0);
+
+ // Max
+ EXPECT_APPROXIMATE(odd_number_elements.max(), 5.0);
+ EXPECT_APPROXIMATE(even_number_elements.max(), 6.0);
+
+ // Median
+ EXPECT_APPROXIMATE(odd_number_elements.median(), 3.0);
+ EXPECT_APPROXIMATE(even_number_elements.median(), 3.5);
+ EXPECT_APPROXIMATE(odd_number_elements_large.median(), 101.0);
+ EXPECT_APPROXIMATE(even_number_elements_large.median(), 180.5);
+
+ // The expected values for standard deviation and variance were calculated by my school issued scientific calculator
+
+ // Standard Deviation
+ EXPECT_APPROXIMATE(odd_number_elements.standard_deviation(), 1.4142135623731);
+ EXPECT_APPROXIMATE(even_number_elements.standard_deviation(), 1.7078251276599);
+
+ // Variance
+ EXPECT_APPROXIMATE(odd_number_elements.variance(), 2.0);
+ EXPECT_APPROXIMATE(even_number_elements.variance(), 2.9166666666667);
+}