aboutsummaryrefslogtreecommitdiffstats
path: root/ydb/library/yql/udfs/common/stat/static/tdigest.h
blob: 1da20691080fb9c3574dbb73ec747801690826ad (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#pragma once

#include <util/generic/map.h>
#include <util/generic/list.h>
#include <util/generic/vector.h>

class TDigest {
    struct TCentroid {
        double Mean;
        double Count;

        TCentroid()
            : Mean(0)
            , Count(0)
        {
        }
        TCentroid(double x, double weight)
            : Mean(x)
            , Count(weight)
        {
        }

        bool operator<(const TCentroid& centroid) const {
            return Mean < centroid.Mean;
        }

        void Update(double x, double weight) {
            Count += weight;
            Mean += weight * (x - Mean) / Count;
        }
    };

    TVector<TCentroid> Centroids; 
    TVector<TCentroid> Unmerged; 
    TVector<TCentroid> Merged; 
    typedef TVector<TCentroid>::iterator iter_t; 
    double N;
    double Delta;
    double K;

    void Add(const TDigest& otherDigest);
    void AddCentroid(const TCentroid& centroid);
    double GetThreshold(double q);

    void MergeCentroid(TVector<TCentroid>& merged, double& sum, const TCentroid& centroid); 

protected:
    void Update(double x, double w = 1.0);

public:
    TDigest(double delta = 0.01, double k = 25);
    TDigest(double delta, double k, double firstValue);
    TDigest(const TString& serializedDigest);
    TDigest(const TDigest* digest1, const TDigest* digest2); // merge
    TString Serialize(); 
    TDigest operator+(const TDigest& other);
    TDigest& operator+=(const TDigest& other);
    void AddValue(double value);
    void Compress();
    void Clear();
    double GetPercentile(double percentile);
};