aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/tdigest/tdigest.h
diff options
context:
space:
mode:
authorifsmirnov <ifsmirnov@yandex-team.com>2022-10-27 15:26:53 +0300
committerifsmirnov <ifsmirnov@yandex-team.com>2022-10-27 15:26:53 +0300
commit4d05deef52c4b096f042ad1ff65284e2c411cae1 (patch)
treeffefd52bfe3cf9c9fbeb142d1dae8eb1e105115e /library/cpp/tdigest/tdigest.h
parent2a0b57f3998e3db7905b20ecd6218e62e3206d3a (diff)
downloadydb-4d05deef52c4b096f042ad1ff65284e2c411cae1.tar.gz
Extract TDigest from YQL to library
Diffstat (limited to 'library/cpp/tdigest/tdigest.h')
-rw-r--r--library/cpp/tdigest/tdigest.h62
1 files changed, 62 insertions, 0 deletions
diff --git a/library/cpp/tdigest/tdigest.h b/library/cpp/tdigest/tdigest.h
new file mode 100644
index 00000000000..acec0a02645
--- /dev/null
+++ b/library/cpp/tdigest/tdigest.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <util/generic/map.h>
+#include <util/generic/list.h>
+#include <util/generic/vector.h>
+
+class TDigest {
+ struct TCentroid {
+ double Mean;
+ double Count;
+
+ TCentroid()
+ : Mean(0)
+ , Count(0)
+ {
+ }
+ TCentroid(double x, double weight)
+ : Mean(x)
+ , Count(weight)
+ {
+ }
+
+ bool operator<(const TCentroid& centroid) const {
+ return Mean < centroid.Mean;
+ }
+
+ void Update(double x, double weight) {
+ Count += weight;
+ Mean += weight * (x - Mean) / Count;
+ }
+ };
+
+ TVector<TCentroid> Centroids;
+ TVector<TCentroid> Unmerged;
+ TVector<TCentroid> Merged;
+ typedef TVector<TCentroid>::iterator iter_t;
+ double N;
+ double Delta;
+ double K;
+
+ void Add(const TDigest& otherDigest);
+ void AddCentroid(const TCentroid& centroid);
+ double GetThreshold(double q);
+
+ void MergeCentroid(TVector<TCentroid>& merged, double& sum, const TCentroid& centroid);
+
+protected:
+ void Update(double x, double w = 1.0);
+
+public:
+ TDigest(double delta = 0.01, double k = 25);
+ TDigest(double delta, double k, double firstValue);
+ TDigest(const TString& serializedDigest);
+ TDigest(const TDigest* digest1, const TDigest* digest2); // merge
+ TString Serialize();
+ TDigest operator+(const TDigest& other);
+ TDigest& operator+=(const TDigest& other);
+ void AddValue(double value);
+ void Compress();
+ void Clear();
+ double GetPercentile(double percentile);
+};