blob: 1da20691080fb9c3574dbb73ec747801690826ad (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#pragma once
#include <util/generic/map.h>
#include <util/generic/list.h>
#include <util/generic/vector.h>
class TDigest {
struct TCentroid {
double Mean;
double Count;
TCentroid()
: Mean(0)
, Count(0)
{
}
TCentroid(double x, double weight)
: Mean(x)
, Count(weight)
{
}
bool operator<(const TCentroid& centroid) const {
return Mean < centroid.Mean;
}
void Update(double x, double weight) {
Count += weight;
Mean += weight * (x - Mean) / Count;
}
};
TVector<TCentroid> Centroids;
TVector<TCentroid> Unmerged;
TVector<TCentroid> Merged;
typedef TVector<TCentroid>::iterator iter_t;
double N;
double Delta;
double K;
void Add(const TDigest& otherDigest);
void AddCentroid(const TCentroid& centroid);
double GetThreshold(double q);
void MergeCentroid(TVector<TCentroid>& merged, double& sum, const TCentroid& centroid);
protected:
void Update(double x, double w = 1.0);
public:
TDigest(double delta = 0.01, double k = 25);
TDigest(double delta, double k, double firstValue);
TDigest(const TString& serializedDigest);
TDigest(const TDigest* digest1, const TDigest* digest2); // merge
TString Serialize();
TDigest operator+(const TDigest& other);
TDigest& operator+=(const TDigest& other);
void AddValue(double value);
void Compress();
void Clear();
double GetPercentile(double percentile);
};
|