aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/histogram/ut/eq_width_histogram_ut.cpp
blob: 9c1b1d969feed1e0e91a27d6282798bdafa56196 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include <library/cpp/testing/unittest/registar.h>

#include "eq_width_histogram.h"

namespace NKikimr {

template <typename T>
bool EqualHistograms(const std::shared_ptr<TEqWidthHistogram> &left, const std::shared_ptr<TEqWidthHistogram> &right) {
  // Not expecting any nullptr.
  if (!left || !right) return false;

  if (left->GetNumBuckets() != right->GetNumBuckets()) {
    return false;
  }
  if (left->GetType() != right->GetType()) {
    return false;
  }

  for (ui32 i = 0; i < left->GetNumBuckets(); ++i) {
    const auto &leftBucket = left->GetBuckets()[i];
    const auto &rightBucket = right->GetBuckets()[i];
    if (leftBucket.Count != rightBucket.Count) {
      return false;
    }
    if (!CmpEqual<T>(LoadFrom<T>(leftBucket.Start), LoadFrom<T>(rightBucket.Start))) {
      return false;
    }
  }

  return true;
}

template <typename T>
std::shared_ptr<TEqWidthHistogram> CreateHistogram(ui32 numBuckets, T start, T range, EHistogramValueType valueType) {
  std::shared_ptr<TEqWidthHistogram> histogram(std::make_shared<TEqWidthHistogram>(numBuckets, valueType));
  TEqWidthHistogram::TBucketRange bucketRange;
  StoreTo<T>(bucketRange.Start, start);
  StoreTo<T>(bucketRange.End, range);
  histogram->InitializeBuckets<T>(bucketRange);
  return histogram;
}

template <typename T>
void PopulateHistogram(std::shared_ptr<TEqWidthHistogram> histogram, const std::pair<ui32, ui32> &range) {
  for (ui32 i = range.first; i < range.second; ++i) {
    histogram->AddElement<T>(i);
  }
}

template <typename T>
void TestHistogramBasic(ui32 numBuckets, std::pair<ui32, ui32> range, std::pair<T, T> bucketRange,
                        EHistogramValueType valueType, std::pair<T, ui64> less, std::pair<T, ui64> greater) {
  auto histogram = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
  UNIT_ASSERT_VALUES_EQUAL(histogram->GetNumBuckets(), numBuckets);
  PopulateHistogram<T>(histogram, range);
  TEqWidthHistogramEstimator estimator(histogram);
  UNIT_ASSERT_VALUES_EQUAL(estimator.EstimateLessOrEqual<T>(less.first), less.second);
  UNIT_ASSERT_VALUES_EQUAL(estimator.EstimateGreaterOrEqual<T>(greater.first), greater.second);
}

template <typename T>
void TestHistogramSerialization(ui32 numBuckets, std::pair<ui32, ui32> range, std::pair<T, T> bucketRange,
                                EHistogramValueType valueType) {
  auto histogram = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
  UNIT_ASSERT(histogram);
  PopulateHistogram<T>(histogram, range);
  ui64 binarySize = 0;
  auto binaryData = histogram->Serialize(binarySize);
  UNIT_ASSERT(binaryData && binarySize);
  TString hString(binaryData.get(), binarySize);
  auto histogramFromString = std::make_shared<TEqWidthHistogram>(hString.data(), hString.size());
  UNIT_ASSERT(histogramFromString);
  UNIT_ASSERT(EqualHistograms<T>(histogram, histogramFromString));
}

template <typename T>
void TestHistogramAggregate(ui32 numBuckets, std::pair<ui32, ui32> range, std::pair<T, T> bucketRange,
                            EHistogramValueType valueType, ui32 numCombine, const TVector<ui64> &resultCount) {
  auto histogram = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
  UNIT_ASSERT(histogram);
  PopulateHistogram<T>(histogram, range);
  auto histogramToAdd = CreateHistogram<T>(numBuckets, bucketRange.first, bucketRange.second, valueType);
  PopulateHistogram<T>(histogramToAdd, range);
  UNIT_ASSERT(histogram);
  for (ui32 i = 0; i < numCombine; ++i) histogram->template Aggregate<T>(*histogramToAdd);
  for (ui32 i = 0; i < histogram->GetNumBuckets(); ++i) {
    UNIT_ASSERT(histogram->GetBuckets()[i].Count == resultCount[i]);
  }
}

Y_UNIT_TEST_SUITE(EqWidthHistogram) {
  Y_UNIT_TEST(Basic) {
    TestHistogramBasic<ui32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Uint32,
                             /*{value, result}=*/{9, 10},
                             /*{value, result}=*/{10, 0});
    TestHistogramBasic<ui64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Uint64,
                             /*{value, result}=*/{9, 10},
                             /*{value, result}=*/{10, 0});
    TestHistogramBasic<i32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int32,
                            /*{value, result}=*/{9, 10},
                            /*{value, result}=*/{10, 0});
    TestHistogramBasic<i64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int64,
                            /*{value, result}=*/{9, 10},
                            /*{value, result}=*/{10, 0});
    TestHistogramBasic<double>(10, /*values range=*/{0.0, 10.0}, /*bucket range=*/{0.0, 2.0},
                               EHistogramValueType::Double,
                               /*{value, result}=*/{9.0, 10},
                               /*{value, result}=*/{10.0, 0});
  }

  Y_UNIT_TEST(Serialization) {
    TestHistogramSerialization<ui32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2},
                                     EHistogramValueType::Uint32);
    TestHistogramSerialization<ui64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2},
                                     EHistogramValueType::Uint64);
    TestHistogramSerialization<i32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int32);
    TestHistogramSerialization<i64>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Int64);
    TestHistogramSerialization<double>(10, /*values range=*/{0.0, 10.0}, /*bucket range=*/{0.0, 2.0},
                                       EHistogramValueType::Double);
  }
  Y_UNIT_TEST(AggregateHistogram) {
    TVector<ui64> resultCount{20, 20, 20, 20, 20, 0, 0, 0, 0, 0};
    TestHistogramAggregate<ui32>(10, /*values range=*/{0, 10}, /*bucket range=*/{0, 2}, EHistogramValueType::Uint32, 9,
                                 resultCount);
  }
}
}  // namespace NKikimr