aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/histogram/eq_width_histogram.cpp
blob: 3c5a452fdbd1a942826d889f304f0493d9e8d836 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include "eq_width_histogram.h"

namespace NKikimr {

TEqWidthHistogram::TEqWidthHistogram(ui32 numBuckets, EHistogramValueType valueType)
    : ValueType(valueType), Buckets(numBuckets) {
  // Exptected at least one bucket for histogram.
  Y_ASSERT(numBuckets >= 1);
}

TEqWidthHistogram::TEqWidthHistogram(const char *str, ui64 size) {
  Y_ASSERT(str && size);
  const ui32 numBuckets = *reinterpret_cast<const ui32 *>(str);
  Y_ABORT_UNLESS(GetBinarySize(numBuckets) == size);
  ui32 offset = sizeof(ui32);
  ValueType = *reinterpret_cast<const EHistogramValueType *>(str + offset);
  offset += sizeof(EHistogramValueType);
  Buckets = TVector<TBucket>(numBuckets);
  for (ui32 i = 0; i < numBuckets; ++i) {
    std::memcpy(&Buckets[i], reinterpret_cast<const char *>(str + offset), sizeof(TBucket));
    offset += sizeof(TBucket);
  }
}

ui64 TEqWidthHistogram::GetBinarySize(ui32 nBuckets) const {
  return sizeof(ui32) + sizeof(EHistogramValueType) + sizeof(TBucket) * nBuckets;
}

// Binary layout:
// [4 byte: number of buckets][1 byte: value type]
// [sizeof(Bucket)[0]... sizeof(Bucket)[n]].
std::unique_ptr<char> TEqWidthHistogram::Serialize(ui64 &binarySize) const {
  binarySize = GetBinarySize(GetNumBuckets());
  std::unique_ptr<char> binaryData(new char[binarySize]);
  ui32 offset = 0;
  const ui32 numBuckets = GetNumBuckets();
  // 4 byte - number of buckets.
  std::memcpy(binaryData.get(), &numBuckets, sizeof(ui32));
  offset += sizeof(ui32);
  // 1 byte - values type.
  std::memcpy(binaryData.get() + offset, &ValueType, sizeof(EHistogramValueType));
  offset += sizeof(EHistogramValueType);
  // Buckets.
  for (ui32 i = 0; i < numBuckets; ++i) {
    std::memcpy(binaryData.get() + offset, &Buckets[i], sizeof(TBucket));
    offset += sizeof(TBucket);
  }
  return binaryData;
}

TEqWidthHistogramEstimator::TEqWidthHistogramEstimator(std::shared_ptr<TEqWidthHistogram> histogram)
    : Histogram(histogram) {
  const auto numBuckets = Histogram->GetNumBuckets();
  PrefixSum = TVector<ui64>(numBuckets);
  SuffixSum = TVector<ui64>(numBuckets);
  CreatePrefixSum(numBuckets);
  CreateSuffixSum(numBuckets);
}

void TEqWidthHistogramEstimator::CreatePrefixSum(ui32 numBuckets) {
  PrefixSum[0] = Histogram->GetNumElementsInBucket(0);
  for (ui32 i = 1; i < numBuckets; ++i) {
    PrefixSum[i] = PrefixSum[i - 1] + Histogram->GetNumElementsInBucket(i);
  }
}

void TEqWidthHistogramEstimator::CreateSuffixSum(ui32 numBuckets) {
  SuffixSum[numBuckets - 1] = Histogram->GetNumElementsInBucket(numBuckets - 1);
  for (i32 i = static_cast<i32>(numBuckets) - 2; i >= 0; --i) {
    SuffixSum[i] = SuffixSum[i + 1] + Histogram->GetNumElementsInBucket(i);
  }
}
}  // namespace NKikimr