aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authoraleksei-le <aleksei-le@yandex-team.com>2023-07-13 11:23:36 +0300
committeraleksei-le <aleksei-le@yandex-team.com>2023-07-13 11:23:36 +0300
commitdfdcf3308ce1eef73cc215b3bf2730292628ff63 (patch)
treecf80b3c5728b7601950e72f9c18abe18e0b889a2 /library/cpp
parentf85ed558056c615312bc2d44a967e3487f3cc164 (diff)
downloadydb-dfdcf3308ce1eef73cc215b3bf2730292628ff63.tar.gz
metrics input: compress number hist buckets
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/monlib/encode/json/json_decoder.cpp2
-rw-r--r--library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp2
-rw-r--r--library/cpp/monlib/metrics/histogram_snapshot.cpp35
-rw-r--r--library/cpp/monlib/metrics/histogram_snapshot.h2
-rw-r--r--library/cpp/monlib/metrics/histogram_snapshot_ut.cpp122
-rw-r--r--library/cpp/monlib/metrics/ut/ya.make1
6 files changed, 155 insertions, 9 deletions
diff --git a/library/cpp/monlib/encode/json/json_decoder.cpp b/library/cpp/monlib/encode/json/json_decoder.cpp
index ad7e01b6af..3718990075 100644
--- a/library/cpp/monlib/encode/json/json_decoder.cpp
+++ b/library/cpp/monlib/encode/json/json_decoder.cpp
@@ -48,7 +48,7 @@ public:
Values_.push_back(InfValue_);
}
- auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_);
+ auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_, true);
Bounds_.clear();
Values_.clear();
diff --git a/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp b/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp
index 7e81357dbd..29441c88c3 100644
--- a/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp
+++ b/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp
@@ -135,7 +135,7 @@ namespace NMonitoring {
Time_ = TInstant::Zero();
PrevBucket_ = ZERO_BUCKET;
Labels_.Clear();
- auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_);
+ auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_, true);
Bounds_.clear();
Values_.clear();
diff --git a/library/cpp/monlib/metrics/histogram_snapshot.cpp b/library/cpp/monlib/metrics/histogram_snapshot.cpp
index 75b5811546..65bb262b98 100644
--- a/library/cpp/monlib/metrics/histogram_snapshot.cpp
+++ b/library/cpp/monlib/metrics/histogram_snapshot.cpp
@@ -7,16 +7,39 @@
namespace NMonitoring {
- IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values) {
+ IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values, bool shrinkBuckets) {
Y_ENSURE(bounds.size() == values.size(),
"mismatched sizes: bounds(" << bounds.size() <<
") != buckets(" << values.size() << ')');
- auto snapshot = TExplicitHistogramSnapshot::New(bounds.size());
-
- for (size_t i = 0; i != bounds.size(); ++i) {
- (*snapshot)[i].first = bounds[i];
- (*snapshot)[i].second = values[i];
+ size_t requiredSize = shrinkBuckets ? std::min(bounds.size(), static_cast<size_t>(HISTOGRAM_MAX_BUCKETS_COUNT)) : bounds.size();
+ auto snapshot = TExplicitHistogramSnapshot::New(requiredSize);
+ if (requiredSize < bounds.size()) {
+ auto remains = bounds.size() % requiredSize;
+ auto divided = bounds.size() / requiredSize;
+ size_t idx{bounds.size()};
+
+ for (size_t i = requiredSize; i > 0; --i) {
+ Y_ENSURE(idx > 0);
+ (*snapshot)[i - 1].first = bounds[idx - 1];
+ (*snapshot)[i - 1].second = 0;
+
+ auto repeat = divided;
+ if (remains > 0) {
+ ++repeat;
+ --remains;
+ }
+ for (; repeat > 0; --repeat) {
+ Y_ENSURE(idx > 0);
+ (*snapshot)[i - 1].second += values[idx - 1];
+ --idx;
+ }
+ }
+ } else {
+ for (size_t i = 0; i != bounds.size(); ++i) {
+ (*snapshot)[i].first = bounds[i];
+ (*snapshot)[i].second = values[i];
+ }
}
return snapshot;
diff --git a/library/cpp/monlib/metrics/histogram_snapshot.h b/library/cpp/monlib/metrics/histogram_snapshot.h
index d1a32c3f94..05cb174fa8 100644
--- a/library/cpp/monlib/metrics/histogram_snapshot.h
+++ b/library/cpp/monlib/metrics/histogram_snapshot.h
@@ -203,7 +203,7 @@ namespace NMonitoring {
static_assert(alignof(TExplicitHistogramSnapshot) == alignof(TBucket),
"mismatched alingments of THistogramSnapshot and TBucket");
- IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values);
+ IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values, bool shrinkBuckets = false);
} // namespace NMonitoring
diff --git a/library/cpp/monlib/metrics/histogram_snapshot_ut.cpp b/library/cpp/monlib/metrics/histogram_snapshot_ut.cpp
new file mode 100644
index 0000000000..7ef9b5a5b5
--- /dev/null
+++ b/library/cpp/monlib/metrics/histogram_snapshot_ut.cpp
@@ -0,0 +1,122 @@
+#include "histogram_snapshot.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <random>
+
+using namespace NMonitoring;
+
+Y_UNIT_TEST_SUITE(THistogramSnapshotTest) {
+ struct Buckets {
+ TBucketBounds Bounds;
+ TBucketValues Values;
+ };
+
+ Buckets MakeBuckets(uint32_t nBackets) {
+ Buckets result;
+ for (uint32_t i = 0; i < nBackets; ++i) {
+ result.Bounds.push_back(i + 1);
+ result.Values.push_back(i + 1);
+ }
+ return result;
+ }
+
+ Y_UNIT_TEST(SimpleTest) {
+ for (uint32_t nBuckets = HISTOGRAM_MAX_BUCKETS_COUNT; nBuckets <= 3 * HISTOGRAM_MAX_BUCKETS_COUNT; ++nBuckets) {
+ auto buckets = MakeBuckets(nBuckets);
+ auto snapshot = ExplicitHistogramSnapshot(buckets.Bounds, buckets.Values, true);
+ UNIT_ASSERT_VALUES_EQUAL(snapshot->Count(), std::min(HISTOGRAM_MAX_BUCKETS_COUNT, nBuckets));
+ uint64_t sumValues{0};
+ for (uint32_t i = 0; i < snapshot->Count(); ++i) {
+ sumValues += snapshot->Value(i);
+ }
+ UNIT_ASSERT_VALUES_EQUAL(sumValues, nBuckets * (nBuckets + 1) / 2);
+ }
+
+ auto backets = MakeBuckets(HISTOGRAM_MAX_BUCKETS_COUNT + 10);
+ UNIT_ASSERT_EXCEPTION(ExplicitHistogramSnapshot(backets.Bounds, backets.Values, false), yexception);
+ }
+
+ Y_UNIT_TEST(InfSimpleTest) {
+ for (uint32_t nBuckets = 1; nBuckets <= 3 * HISTOGRAM_MAX_BUCKETS_COUNT; ++nBuckets) {
+ auto buckets = MakeBuckets(nBuckets);
+ buckets.Bounds.back() = Max<TBucketBound>();
+ auto snapshot = ExplicitHistogramSnapshot(buckets.Bounds, buckets.Values, true);
+
+ auto nBucketsReal = std::min(HISTOGRAM_MAX_BUCKETS_COUNT, nBuckets);
+ UNIT_ASSERT_VALUES_EQUAL(snapshot->Count(), nBucketsReal);
+ UNIT_ASSERT_DOUBLES_EQUAL(snapshot->UpperBound(nBucketsReal - 1), Max<TBucketBound>(), 1e-6);
+ uint64_t sumValues{0};
+ for (uint32_t i = 0; i < snapshot->Count(); ++i) {
+ sumValues += snapshot->Value(i);
+ }
+ UNIT_ASSERT_VALUES_EQUAL(sumValues, nBuckets * (nBuckets + 1) / 2);
+ }
+ }
+
+ Y_UNIT_TEST(BacketsTest) {
+ for (uint32_t nBuckets = HISTOGRAM_MAX_BUCKETS_COUNT; nBuckets <= 3 * HISTOGRAM_MAX_BUCKETS_COUNT; ++nBuckets) {
+ auto overlap = nBuckets % HISTOGRAM_MAX_BUCKETS_COUNT;
+ auto divided = nBuckets / HISTOGRAM_MAX_BUCKETS_COUNT;
+ auto buckets = MakeBuckets(nBuckets);
+ auto snapshot = ExplicitHistogramSnapshot(buckets.Bounds, buckets.Values, true);
+ UNIT_ASSERT_DOUBLES_EQUAL(snapshot->UpperBound(HISTOGRAM_MAX_BUCKETS_COUNT - 1), nBuckets, 1e-6);
+
+ uint64_t sumBuckets{0};
+ uint64_t sumSnapshot{0};
+ size_t idx{0};
+
+ for (uint32_t i = 0; i < HISTOGRAM_MAX_BUCKETS_COUNT; ++i) {
+ sumSnapshot += snapshot->Value(i);
+ auto delta = (i < HISTOGRAM_MAX_BUCKETS_COUNT - overlap) ? 0ull : (i - (HISTOGRAM_MAX_BUCKETS_COUNT - overlap) + 1);
+ auto endIdx = divided * (i + 1) + delta;
+ UNIT_ASSERT_VALUES_EQUAL(snapshot->UpperBound(i), endIdx);
+ while (idx < endIdx) {
+ sumBuckets += buckets.Values[idx];
+ ++idx;
+ }
+ UNIT_ASSERT_VALUES_EQUAL(sumBuckets, sumSnapshot);
+ }
+ }
+ }
+
+ Y_UNIT_TEST(CompareHistTest) {
+ uint32_t K = 4;
+ uint32_t N = K * HISTOGRAM_MAX_BUCKETS_COUNT;
+ Buckets bucketsBig;
+ Buckets bucketsSmall;
+ for (uint32_t i = 1; i <= N; ++i) {
+ if (i % K == 0) {
+ bucketsSmall.Bounds.push_back(i);
+ bucketsSmall.Values.push_back(0);
+ }
+ bucketsBig.Bounds.push_back(i);
+ bucketsBig.Values.push_back(0);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(bucketsBig.Values.size(), N);
+ UNIT_ASSERT_VALUES_EQUAL(bucketsSmall.Values.size(), N / K);
+ UNIT_ASSERT_VALUES_EQUAL(bucketsBig.Bounds.back(), N);
+ UNIT_ASSERT_VALUES_EQUAL(bucketsSmall.Bounds.back(), N);
+
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<> distrib(1, N);
+
+ for (int i = 0; i < 1000; ++i) {
+ auto rndValue = distrib(gen);
+ ++bucketsBig.Values[rndValue - 1];
+ ++bucketsSmall.Values[(rndValue - 1) / K];
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(bucketsSmall.Bounds.back(), N);
+ UNIT_ASSERT_VALUES_EQUAL(bucketsBig.Bounds.back(), N);
+ auto snapshotBig = ExplicitHistogramSnapshot(bucketsBig.Bounds, bucketsBig.Values, true);
+ auto snapshotSmall = ExplicitHistogramSnapshot(bucketsSmall.Bounds, bucketsSmall.Values, true);
+ UNIT_ASSERT_VALUES_EQUAL(snapshotBig->Count(), snapshotSmall->Count());
+
+ for (uint32_t i = 0; i < snapshotSmall->Count(); ++i) {
+ UNIT_ASSERT_VALUES_EQUAL(snapshotSmall->Value(i), snapshotBig->Value(i));
+ }
+ }
+}
diff --git a/library/cpp/monlib/metrics/ut/ya.make b/library/cpp/monlib/metrics/ut/ya.make
index b0f3440750..d30cc3db84 100644
--- a/library/cpp/monlib/metrics/ut/ya.make
+++ b/library/cpp/monlib/metrics/ut/ya.make
@@ -4,6 +4,7 @@ SRCS(
ewma_ut.cpp
fake_ut.cpp
histogram_collector_ut.cpp
+ histogram_snapshot_ut.cpp
labels_ut.cpp
log_histogram_collector_ut.cpp
metric_registry_ut.cpp