diff options
author | ifsmirnov <ifsmirnov@yandex-team.com> | 2022-12-26 17:41:10 +0300 |
---|---|---|
committer | ifsmirnov <ifsmirnov@yandex-team.com> | 2022-12-26 17:41:10 +0300 |
commit | 99cb30abce005e4f2073b737ca09b88da18c687f (patch) | |
tree | caca972cc53bf008442714985b16d017609f3fd2 /library/cpp/tdigest/tdigest.cpp | |
parent | 3f10cf68f6146c9a0aa13a36ea8d7e05bc8f3725 (diff) | |
download | ydb-99cb30abce005e4f2073b737ca09b88da18c687f.tar.gz |
Row digests for store compactor
Diffstat (limited to 'library/cpp/tdigest/tdigest.cpp')
-rw-r--r-- | library/cpp/tdigest/tdigest.cpp | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/library/cpp/tdigest/tdigest.cpp b/library/cpp/tdigest/tdigest.cpp index 480425d2e2..3bd6d2d7e5 100644 --- a/library/cpp/tdigest/tdigest.cpp +++ b/library/cpp/tdigest/tdigest.cpp @@ -19,11 +19,11 @@ TDigest::TDigest(double delta, double k, double firstValue) AddValue(firstValue); } -TDigest::TDigest(const TString& serializedDigest) +TDigest::TDigest(TStringBuf serializedDigest) : N(0) { NTDigest::TDigest digest; - Y_VERIFY(digest.ParseFromString(serializedDigest)); + Y_VERIFY(digest.ParseFromArray(serializedDigest.data(), serializedDigest.size())); Delta = digest.GetDelta(); K = digest.GetK(); for (int i = 0; i < digest.centroids_size(); ++i) { @@ -157,6 +157,33 @@ double TDigest::GetPercentile(double percentile) { return Centroids.back().Mean; } +double TDigest::GetRank(double value) { + Compress(); + if (Centroids.empty()) { + return 0.0; + } + if (value < Centroids.front().Mean) { + return 0.0; + } + if (value == Centroids.front().Mean) { + return Centroids.front().Count * 0.5 / N; + } + double sum = 0.0; + double prev_x = 0.0; + double prev_mean = Centroids.front().Mean; + for (const auto& C : Centroids) { + double current_x = sum + C.Count * 0.5; + if (value <= C.Mean) { + double k = (value - prev_mean) / (C.Mean - prev_mean); + return (prev_x + k * (current_x - prev_x)) / N; + } + sum += C.Count; + prev_mean = C.Mean; + prev_x = current_x; + } + return 1.0; +} + TString TDigest::Serialize() { Compress(); NTDigest::TDigest digest; @@ -169,3 +196,7 @@ TString TDigest::Serialize() { } return digest.SerializeAsString(); } + +i64 TDigest::GetCount() const { + return std::llround(N); +} |