diff options
author | ifsmirnov <ifsmirnov@yandex-team.com> | 2022-12-26 17:41:10 +0300 |
---|---|---|
committer | ifsmirnov <ifsmirnov@yandex-team.com> | 2022-12-26 17:41:10 +0300 |
commit | 99cb30abce005e4f2073b737ca09b88da18c687f (patch) | |
tree | caca972cc53bf008442714985b16d017609f3fd2 /library | |
parent | 3f10cf68f6146c9a0aa13a36ea8d7e05bc8f3725 (diff) | |
download | ydb-99cb30abce005e4f2073b737ca09b88da18c687f.tar.gz |
Row digests for store compactor
Diffstat (limited to 'library')
-rw-r--r-- | library/cpp/tdigest/tdigest.cpp | 35 | ||||
-rw-r--r-- | library/cpp/tdigest/tdigest.h | 10 |
2 files changed, 40 insertions, 5 deletions
diff --git a/library/cpp/tdigest/tdigest.cpp b/library/cpp/tdigest/tdigest.cpp index 480425d2e2..3bd6d2d7e5 100644 --- a/library/cpp/tdigest/tdigest.cpp +++ b/library/cpp/tdigest/tdigest.cpp @@ -19,11 +19,11 @@ TDigest::TDigest(double delta, double k, double firstValue) AddValue(firstValue); } -TDigest::TDigest(const TString& serializedDigest) +TDigest::TDigest(TStringBuf serializedDigest) : N(0) { NTDigest::TDigest digest; - Y_VERIFY(digest.ParseFromString(serializedDigest)); + Y_VERIFY(digest.ParseFromArray(serializedDigest.data(), serializedDigest.size())); Delta = digest.GetDelta(); K = digest.GetK(); for (int i = 0; i < digest.centroids_size(); ++i) { @@ -157,6 +157,33 @@ double TDigest::GetPercentile(double percentile) { return Centroids.back().Mean; } +double TDigest::GetRank(double value) { + Compress(); + if (Centroids.empty()) { + return 0.0; + } + if (value < Centroids.front().Mean) { + return 0.0; + } + if (value == Centroids.front().Mean) { + return Centroids.front().Count * 0.5 / N; + } + double sum = 0.0; + double prev_x = 0.0; + double prev_mean = Centroids.front().Mean; + for (const auto& C : Centroids) { + double current_x = sum + C.Count * 0.5; + if (value <= C.Mean) { + double k = (value - prev_mean) / (C.Mean - prev_mean); + return (prev_x + k * (current_x - prev_x)) / N; + } + sum += C.Count; + prev_mean = C.Mean; + prev_x = current_x; + } + return 1.0; +} + TString TDigest::Serialize() { Compress(); NTDigest::TDigest digest; @@ -169,3 +196,7 @@ TString TDigest::Serialize() { } return digest.SerializeAsString(); } + +i64 TDigest::GetCount() const { + return std::llround(N); +} diff --git a/library/cpp/tdigest/tdigest.h b/library/cpp/tdigest/tdigest.h index acec0a0264..715620258c 100644 --- a/library/cpp/tdigest/tdigest.h +++ b/library/cpp/tdigest/tdigest.h @@ -1,7 +1,6 @@ #pragma once -#include <util/generic/map.h> -#include <util/generic/list.h> +#include <util/generic/string.h> #include <util/generic/vector.h> class TDigest { @@ -50,13 +49,18 @@ protected: public: TDigest(double delta = 0.01, double k = 25); TDigest(double delta, double k, double firstValue); - TDigest(const TString& serializedDigest); + TDigest(TStringBuf serializedDigest); TDigest(const TDigest* digest1, const TDigest* digest2); // merge + TString Serialize(); + TDigest operator+(const TDigest& other); TDigest& operator+=(const TDigest& other); + void AddValue(double value); void Compress(); void Clear(); double GetPercentile(double percentile); + double GetRank(double value); + i64 GetCount() const; }; |