1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#include <library/cpp/testing/unittest/registar.h>
#include "eq_width_histogram.h"
namespace NKikimr {
template <typename T>
bool EqualHistograms(const std::shared_ptr<TEqWidthHistogram> &left, const std::shared_ptr<TEqWidthHistogram> &right) {
// Not expecting any nullptr.
if (!left || !right) return false;
if (left->GetNumBuckets() != right->GetNumBuckets()) {
return false;
}
if (left->GetType() != right->GetType()) {
return false;
}
for (ui32 i = 0; i < left->GetNumBuckets(); ++i) {
const auto &leftBucket = left->GetBuckets()[i];
const auto &rightBucket = right->GetBuckets()[i];
if (leftBucket.Count != rightBucket.Count) {
return false;
}
if (!CmpEqual<T>(LoadFrom<T>(leftBucket.Start), LoadFrom<T>(rightBucket.Start))) {
return false;
}
}
return true;
}
template <typename T>
std::shared_ptr<TEqWidthHistogram> CreateHistogram(ui32 numBuckets, T start, T range, EHistogramValueType valueType) {
std::shared_ptr<TEqWidthHistogram> histogram(std::make_shared<TEqWidthHistogram>(numBuckets, valueType));
TEqWidthHistogram::TBucketRange bucketRange;
StoreTo<T>(bucketRange.Start, start);
StoreTo<T>(bucketRange.End, range);
histogram->InitializeBuckets<T>(bucketRange);
return histogram;
}
template <typename T>
void PopulateHistogram(std::shared_ptr<TEqWidthHistogram> histogram, const std::pair<ui32, ui32> &range) {
for (ui32 i = range.first; i < range.second; ++i) {
histogram->AddElement<T>(i);
}
}
template <typename T>
void TestHistogramBasic(ui32 numBuckets, std::pair<ui32, ui32> range, std::pair<T, T> bucketRange,
EHistogramValueType valueType, std::pair<T, ui64> less, std::pair<T, ui64> greater) {
auto histogram = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
UNIT_ASSERT_VALUES_EQUAL(histogram->GetNumBuckets(), numBuckets);
PopulateHistogram<T>(histogram, range);
TEqWidthHistogramEstimator estimator(histogram);
UNIT_ASSERT_VALUES_EQUAL(estimator.EstimateLessOrEqual<T>(less.first), less.second);
UNIT_ASSERT_VALUES_EQUAL(estimator.EstimateGreaterOrEqual<T>(greater.first), greater.second);
}
template <typename T>
void TestHistogramSerialization(ui32 numBuckets, std::pair<ui32, ui32> range, std::pair<T, T> bucketRange,
EHistogramValueType valueType) {
auto histogram = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
UNIT_ASSERT(histogram);
PopulateHistogram<T>(histogram, range);
ui64 binarySize = 0;
auto binaryData = histogram->Serialize(binarySize);
UNIT_ASSERT(binaryData && binarySize);
TString hString(binaryData.get(), binarySize);
auto histogramFromString = std::make_shared<TEqWidthHistogram>(hString.data(), hString.size());
UNIT_ASSERT(histogramFromString);
UNIT_ASSERT(EqualHistograms<T>(histogram, histogramFromString));
}
template <typename T>
void TestHistogramAggregate(ui32 numBuckets, std::pair<ui32, ui32> range, std::pair<T, T> bucketRange,
EHistogramValueType valueType, ui32 numCombine, const TVector<ui64> &resultCount) {
auto histogram = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
UNIT_ASSERT(histogram);
PopulateHistogram<T>(histogram, range);
auto histogramToAdd = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
PopulateHistogram<T>(histogramToAdd, range);
UNIT_ASSERT(histogram);
for (ui32 i = 0; i < numCombine; ++i) histogram->template Aggregate<T>(*histogramToAdd);
for (ui32 i = 0; i < histogram->GetNumBuckets(); ++i) {
UNIT_ASSERT(histogram->GetBuckets()[i].Count == resultCount[i]);
}
}
Y_UNIT_TEST_SUITE(EqWidthHistogram) {
Y_UNIT_TEST(Basic) {
TestHistogramBasic<ui32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Uint32,
/*{value, result}=*/{9, 10},
/*{value, result}=*/{10, 0});
TestHistogramBasic<ui64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Uint64,
/*{value, result}=*/{9, 10},
/*{value, result}=*/{10, 0});
TestHistogramBasic<i32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int32,
/*{value, result}=*/{9, 10},
/*{value, result}=*/{10, 0});
TestHistogramBasic<i64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int64,
/*{value, result}=*/{9, 10},
/*{value, result}=*/{10, 0});
TestHistogramBasic<double>(10, /*values range=*/{0.0, 10.0}, /*bucket range=*/{0.0, 2.0},
EHistogramValueType::Double,
/*{value, result}=*/{9.0, 10},
/*{value, result}=*/{10.0, 0});
}
Y_UNIT_TEST(Serialization) {
TestHistogramSerialization<ui32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2},
EHistogramValueType::Uint32);
TestHistogramSerialization<ui64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2},
EHistogramValueType::Uint64);
TestHistogramSerialization<i32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int32);
TestHistogramSerialization<i64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int64);
TestHistogramSerialization<double>(10, /*values range=*/{0.0, 10.0}, /*bucket range=*/{0.0, 2.0},
EHistogramValueType::Double);
}
Y_UNIT_TEST(AggregateHistogram) {
TVector<ui64> resultCount{20, 20, 20, 20, 20, 0, 0, 0, 0, 0};
TestHistogramAggregate<ui32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Uint32, 9,
resultCount);
}
}
} // namespace NKikimr
|