diff options
author | aleksei-le <aleksei-le@yandex-team.com> | 2023-07-13 11:23:36 +0300 |
---|---|---|
committer | aleksei-le <aleksei-le@yandex-team.com> | 2023-07-13 11:23:36 +0300 |
commit | dfdcf3308ce1eef73cc215b3bf2730292628ff63 (patch) | |
tree | cf80b3c5728b7601950e72f9c18abe18e0b889a2 /library/cpp | |
parent | f85ed558056c615312bc2d44a967e3487f3cc164 (diff) | |
download | ydb-dfdcf3308ce1eef73cc215b3bf2730292628ff63.tar.gz |
metrics input: compress number hist buckets
Diffstat (limited to 'library/cpp')
-rw-r--r-- | library/cpp/monlib/encode/json/json_decoder.cpp | 2 | ||||
-rw-r--r-- | library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp | 2 | ||||
-rw-r--r-- | library/cpp/monlib/metrics/histogram_snapshot.cpp | 35 | ||||
-rw-r--r-- | library/cpp/monlib/metrics/histogram_snapshot.h | 2 | ||||
-rw-r--r-- | library/cpp/monlib/metrics/histogram_snapshot_ut.cpp | 122 | ||||
-rw-r--r-- | library/cpp/monlib/metrics/ut/ya.make | 1 |
6 files changed, 155 insertions, 9 deletions
diff --git a/library/cpp/monlib/encode/json/json_decoder.cpp b/library/cpp/monlib/encode/json/json_decoder.cpp index ad7e01b6af..3718990075 100644 --- a/library/cpp/monlib/encode/json/json_decoder.cpp +++ b/library/cpp/monlib/encode/json/json_decoder.cpp @@ -48,7 +48,7 @@ public: Values_.push_back(InfValue_); } - auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_); + auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_, true); Bounds_.clear(); Values_.clear(); diff --git a/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp b/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp index 7e81357dbd..29441c88c3 100644 --- a/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp +++ b/library/cpp/monlib/encode/prometheus/prometheus_decoder.cpp @@ -135,7 +135,7 @@ namespace NMonitoring { Time_ = TInstant::Zero(); PrevBucket_ = ZERO_BUCKET; Labels_.Clear(); - auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_); + auto snapshot = ExplicitHistogramSnapshot(Bounds_, Values_, true); Bounds_.clear(); Values_.clear(); diff --git a/library/cpp/monlib/metrics/histogram_snapshot.cpp b/library/cpp/monlib/metrics/histogram_snapshot.cpp index 75b5811546..65bb262b98 100644 --- a/library/cpp/monlib/metrics/histogram_snapshot.cpp +++ b/library/cpp/monlib/metrics/histogram_snapshot.cpp @@ -7,16 +7,39 @@ namespace NMonitoring { - IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values) { + IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values, bool shrinkBuckets) { Y_ENSURE(bounds.size() == values.size(), "mismatched sizes: bounds(" << bounds.size() << ") != buckets(" << values.size() << ')'); - auto snapshot = TExplicitHistogramSnapshot::New(bounds.size()); - - for (size_t i = 0; i != bounds.size(); ++i) { - (*snapshot)[i].first = bounds[i]; - (*snapshot)[i].second = values[i]; + size_t requiredSize = shrinkBuckets ? std::min(bounds.size(), static_cast<size_t>(HISTOGRAM_MAX_BUCKETS_COUNT)) : bounds.size(); + auto snapshot = TExplicitHistogramSnapshot::New(requiredSize); + if (requiredSize < bounds.size()) { + auto remains = bounds.size() % requiredSize; + auto divided = bounds.size() / requiredSize; + size_t idx{bounds.size()}; + + for (size_t i = requiredSize; i > 0; --i) { + Y_ENSURE(idx > 0); + (*snapshot)[i - 1].first = bounds[idx - 1]; + (*snapshot)[i - 1].second = 0; + + auto repeat = divided; + if (remains > 0) { + ++repeat; + --remains; + } + for (; repeat > 0; --repeat) { + Y_ENSURE(idx > 0); + (*snapshot)[i - 1].second += values[idx - 1]; + --idx; + } + } + } else { + for (size_t i = 0; i != bounds.size(); ++i) { + (*snapshot)[i].first = bounds[i]; + (*snapshot)[i].second = values[i]; + } } return snapshot; diff --git a/library/cpp/monlib/metrics/histogram_snapshot.h b/library/cpp/monlib/metrics/histogram_snapshot.h index d1a32c3f94..05cb174fa8 100644 --- a/library/cpp/monlib/metrics/histogram_snapshot.h +++ b/library/cpp/monlib/metrics/histogram_snapshot.h @@ -203,7 +203,7 @@ namespace NMonitoring { static_assert(alignof(TExplicitHistogramSnapshot) == alignof(TBucket), "mismatched alingments of THistogramSnapshot and TBucket"); - IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values); + IHistogramSnapshotPtr ExplicitHistogramSnapshot(TConstArrayRef<TBucketBound> bounds, TConstArrayRef<TBucketValue> values, bool shrinkBuckets = false); } // namespace NMonitoring diff --git a/library/cpp/monlib/metrics/histogram_snapshot_ut.cpp b/library/cpp/monlib/metrics/histogram_snapshot_ut.cpp new file mode 100644 index 0000000000..7ef9b5a5b5 --- /dev/null +++ b/library/cpp/monlib/metrics/histogram_snapshot_ut.cpp @@ -0,0 +1,122 @@ +#include "histogram_snapshot.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <random> + +using namespace NMonitoring; + +Y_UNIT_TEST_SUITE(THistogramSnapshotTest) { + struct Buckets { + TBucketBounds Bounds; + TBucketValues Values; + }; + + Buckets MakeBuckets(uint32_t nBackets) { + Buckets result; + for (uint32_t i = 0; i < nBackets; ++i) { + result.Bounds.push_back(i + 1); + result.Values.push_back(i + 1); + } + return result; + } + + Y_UNIT_TEST(SimpleTest) { + for (uint32_t nBuckets = HISTOGRAM_MAX_BUCKETS_COUNT; nBuckets <= 3 * HISTOGRAM_MAX_BUCKETS_COUNT; ++nBuckets) { + auto buckets = MakeBuckets(nBuckets); + auto snapshot = ExplicitHistogramSnapshot(buckets.Bounds, buckets.Values, true); + UNIT_ASSERT_VALUES_EQUAL(snapshot->Count(), std::min(HISTOGRAM_MAX_BUCKETS_COUNT, nBuckets)); + uint64_t sumValues{0}; + for (uint32_t i = 0; i < snapshot->Count(); ++i) { + sumValues += snapshot->Value(i); + } + UNIT_ASSERT_VALUES_EQUAL(sumValues, nBuckets * (nBuckets + 1) / 2); + } + + auto backets = MakeBuckets(HISTOGRAM_MAX_BUCKETS_COUNT + 10); + UNIT_ASSERT_EXCEPTION(ExplicitHistogramSnapshot(backets.Bounds, backets.Values, false), yexception); + } + + Y_UNIT_TEST(InfSimpleTest) { + for (uint32_t nBuckets = 1; nBuckets <= 3 * HISTOGRAM_MAX_BUCKETS_COUNT; ++nBuckets) { + auto buckets = MakeBuckets(nBuckets); + buckets.Bounds.back() = Max<TBucketBound>(); + auto snapshot = ExplicitHistogramSnapshot(buckets.Bounds, buckets.Values, true); + + auto nBucketsReal = std::min(HISTOGRAM_MAX_BUCKETS_COUNT, nBuckets); + UNIT_ASSERT_VALUES_EQUAL(snapshot->Count(), nBucketsReal); + UNIT_ASSERT_DOUBLES_EQUAL(snapshot->UpperBound(nBucketsReal - 1), Max<TBucketBound>(), 1e-6); + uint64_t sumValues{0}; + for (uint32_t i = 0; i < snapshot->Count(); ++i) { + sumValues += snapshot->Value(i); + } + UNIT_ASSERT_VALUES_EQUAL(sumValues, nBuckets * (nBuckets + 1) / 2); + } + } + + Y_UNIT_TEST(BacketsTest) { + for (uint32_t nBuckets = HISTOGRAM_MAX_BUCKETS_COUNT; nBuckets <= 3 * HISTOGRAM_MAX_BUCKETS_COUNT; ++nBuckets) { + auto overlap = nBuckets % HISTOGRAM_MAX_BUCKETS_COUNT; + auto divided = nBuckets / HISTOGRAM_MAX_BUCKETS_COUNT; + auto buckets = MakeBuckets(nBuckets); + auto snapshot = ExplicitHistogramSnapshot(buckets.Bounds, buckets.Values, true); + UNIT_ASSERT_DOUBLES_EQUAL(snapshot->UpperBound(HISTOGRAM_MAX_BUCKETS_COUNT - 1), nBuckets, 1e-6); + + uint64_t sumBuckets{0}; + uint64_t sumSnapshot{0}; + size_t idx{0}; + + for (uint32_t i = 0; i < HISTOGRAM_MAX_BUCKETS_COUNT; ++i) { + sumSnapshot += snapshot->Value(i); + auto delta = (i < HISTOGRAM_MAX_BUCKETS_COUNT - overlap) ? 0ull : (i - (HISTOGRAM_MAX_BUCKETS_COUNT - overlap) + 1); + auto endIdx = divided * (i + 1) + delta; + UNIT_ASSERT_VALUES_EQUAL(snapshot->UpperBound(i), endIdx); + while (idx < endIdx) { + sumBuckets += buckets.Values[idx]; + ++idx; + } + UNIT_ASSERT_VALUES_EQUAL(sumBuckets, sumSnapshot); + } + } + } + + Y_UNIT_TEST(CompareHistTest) { + uint32_t K = 4; + uint32_t N = K * HISTOGRAM_MAX_BUCKETS_COUNT; + Buckets bucketsBig; + Buckets bucketsSmall; + for (uint32_t i = 1; i <= N; ++i) { + if (i % K == 0) { + bucketsSmall.Bounds.push_back(i); + bucketsSmall.Values.push_back(0); + } + bucketsBig.Bounds.push_back(i); + bucketsBig.Values.push_back(0); + } + + UNIT_ASSERT_VALUES_EQUAL(bucketsBig.Values.size(), N); + UNIT_ASSERT_VALUES_EQUAL(bucketsSmall.Values.size(), N / K); + UNIT_ASSERT_VALUES_EQUAL(bucketsBig.Bounds.back(), N); + UNIT_ASSERT_VALUES_EQUAL(bucketsSmall.Bounds.back(), N); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distrib(1, N); + + for (int i = 0; i < 1000; ++i) { + auto rndValue = distrib(gen); + ++bucketsBig.Values[rndValue - 1]; + ++bucketsSmall.Values[(rndValue - 1) / K]; + } + + UNIT_ASSERT_VALUES_EQUAL(bucketsSmall.Bounds.back(), N); + UNIT_ASSERT_VALUES_EQUAL(bucketsBig.Bounds.back(), N); + auto snapshotBig = ExplicitHistogramSnapshot(bucketsBig.Bounds, bucketsBig.Values, true); + auto snapshotSmall = ExplicitHistogramSnapshot(bucketsSmall.Bounds, bucketsSmall.Values, true); + UNIT_ASSERT_VALUES_EQUAL(snapshotBig->Count(), snapshotSmall->Count()); + + for (uint32_t i = 0; i < snapshotSmall->Count(); ++i) { + UNIT_ASSERT_VALUES_EQUAL(snapshotSmall->Value(i), snapshotBig->Value(i)); + } + } +} diff --git a/library/cpp/monlib/metrics/ut/ya.make b/library/cpp/monlib/metrics/ut/ya.make index b0f3440750..d30cc3db84 100644 --- a/library/cpp/monlib/metrics/ut/ya.make +++ b/library/cpp/monlib/metrics/ut/ya.make @@ -4,6 +4,7 @@ SRCS( ewma_ut.cpp fake_ut.cpp histogram_collector_ut.cpp + histogram_snapshot_ut.cpp labels_ut.cpp log_histogram_collector_ut.cpp metric_registry_ut.cpp |