aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/tdigest/tdigest.cpp
diff options
context:
space:
mode:
authorifsmirnov <ifsmirnov@yandex-team.com>2022-12-26 17:41:10 +0300
committerifsmirnov <ifsmirnov@yandex-team.com>2022-12-26 17:41:10 +0300
commit99cb30abce005e4f2073b737ca09b88da18c687f (patch)
treecaca972cc53bf008442714985b16d017609f3fd2 /library/cpp/tdigest/tdigest.cpp
parent3f10cf68f6146c9a0aa13a36ea8d7e05bc8f3725 (diff)
downloadydb-99cb30abce005e4f2073b737ca09b88da18c687f.tar.gz
Row digests for store compactor
Diffstat (limited to 'library/cpp/tdigest/tdigest.cpp')
-rw-r--r--library/cpp/tdigest/tdigest.cpp35
1 files changed, 33 insertions, 2 deletions
diff --git a/library/cpp/tdigest/tdigest.cpp b/library/cpp/tdigest/tdigest.cpp
index 480425d2e2..3bd6d2d7e5 100644
--- a/library/cpp/tdigest/tdigest.cpp
+++ b/library/cpp/tdigest/tdigest.cpp
@@ -19,11 +19,11 @@ TDigest::TDigest(double delta, double k, double firstValue)
AddValue(firstValue);
}
-TDigest::TDigest(const TString& serializedDigest)
+TDigest::TDigest(TStringBuf serializedDigest)
: N(0)
{
NTDigest::TDigest digest;
- Y_VERIFY(digest.ParseFromString(serializedDigest));
+ Y_VERIFY(digest.ParseFromArray(serializedDigest.data(), serializedDigest.size()));
Delta = digest.GetDelta();
K = digest.GetK();
for (int i = 0; i < digest.centroids_size(); ++i) {
@@ -157,6 +157,33 @@ double TDigest::GetPercentile(double percentile) {
return Centroids.back().Mean;
}
+double TDigest::GetRank(double value) {
+ Compress();
+ if (Centroids.empty()) {
+ return 0.0;
+ }
+ if (value < Centroids.front().Mean) {
+ return 0.0;
+ }
+ if (value == Centroids.front().Mean) {
+ return Centroids.front().Count * 0.5 / N;
+ }
+ double sum = 0.0;
+ double prev_x = 0.0;
+ double prev_mean = Centroids.front().Mean;
+ for (const auto& C : Centroids) {
+ double current_x = sum + C.Count * 0.5;
+ if (value <= C.Mean) {
+ double k = (value - prev_mean) / (C.Mean - prev_mean);
+ return (prev_x + k * (current_x - prev_x)) / N;
+ }
+ sum += C.Count;
+ prev_mean = C.Mean;
+ prev_x = current_x;
+ }
+ return 1.0;
+}
+
TString TDigest::Serialize() {
Compress();
NTDigest::TDigest digest;
@@ -169,3 +196,7 @@ TString TDigest::Serialize() {
}
return digest.SerializeAsString();
}
+
+i64 TDigest::GetCount() const {
+ return std::llround(N);
+}