summaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/histogram/eq_width_histogram.cpp
diff options
context:
space:
mode:
authordeniskhalikov <[email protected]>2025-03-31 15:42:14 +0300
committerdeniskhalikov <[email protected]>2025-03-31 15:58:39 +0300
commit06c199b566aca570faf8661b96ea873a229f923a (patch)
tree63cb078ebb2702770eb89056fba85fa2b6a9a7db /yql/essentials/core/histogram/eq_width_histogram.cpp
parent3e9cb82822c4878edee4ef5852b16116424db357 (diff)
Add equal width histogram.
This patch add equal width histogram for use in assessing predicate selectivity for CBO. commit_hash:8d2d3a7f89fb9b2261af1b6ba201e8d2a8b098b7
Diffstat (limited to 'yql/essentials/core/histogram/eq_width_histogram.cpp')
-rw-r--r--yql/essentials/core/histogram/eq_width_histogram.cpp73
1 files changed, 73 insertions, 0 deletions
diff --git a/yql/essentials/core/histogram/eq_width_histogram.cpp b/yql/essentials/core/histogram/eq_width_histogram.cpp
new file mode 100644
index 00000000000..3c5a452fdbd
--- /dev/null
+++ b/yql/essentials/core/histogram/eq_width_histogram.cpp
@@ -0,0 +1,73 @@
+#include "eq_width_histogram.h"
+
+namespace NKikimr {
+
+TEqWidthHistogram::TEqWidthHistogram(ui32 numBuckets, EHistogramValueType valueType)
+ : ValueType(valueType), Buckets(numBuckets) {
+ // Exptected at least one bucket for histogram.
+ Y_ASSERT(numBuckets >= 1);
+}
+
+TEqWidthHistogram::TEqWidthHistogram(const char *str, ui64 size) {
+ Y_ASSERT(str && size);
+ const ui32 numBuckets = *reinterpret_cast<const ui32 *>(str);
+ Y_ABORT_UNLESS(GetBinarySize(numBuckets) == size);
+ ui32 offset = sizeof(ui32);
+ ValueType = *reinterpret_cast<const EHistogramValueType *>(str + offset);
+ offset += sizeof(EHistogramValueType);
+ Buckets = TVector<TBucket>(numBuckets);
+ for (ui32 i = 0; i < numBuckets; ++i) {
+ std::memcpy(&Buckets[i], reinterpret_cast<const char *>(str + offset), sizeof(TBucket));
+ offset += sizeof(TBucket);
+ }
+}
+
+ui64 TEqWidthHistogram::GetBinarySize(ui32 nBuckets) const {
+ return sizeof(ui32) + sizeof(EHistogramValueType) + sizeof(TBucket) * nBuckets;
+}
+
+// Binary layout:
+// [4 byte: number of buckets][1 byte: value type]
+// [sizeof(Bucket)[0]... sizeof(Bucket)[n]].
+std::unique_ptr<char> TEqWidthHistogram::Serialize(ui64 &binarySize) const {
+ binarySize = GetBinarySize(GetNumBuckets());
+ std::unique_ptr<char> binaryData(new char[binarySize]);
+ ui32 offset = 0;
+ const ui32 numBuckets = GetNumBuckets();
+ // 4 byte - number of buckets.
+ std::memcpy(binaryData.get(), &numBuckets, sizeof(ui32));
+ offset += sizeof(ui32);
+ // 1 byte - values type.
+ std::memcpy(binaryData.get() + offset, &ValueType, sizeof(EHistogramValueType));
+ offset += sizeof(EHistogramValueType);
+ // Buckets.
+ for (ui32 i = 0; i < numBuckets; ++i) {
+ std::memcpy(binaryData.get() + offset, &Buckets[i], sizeof(TBucket));
+ offset += sizeof(TBucket);
+ }
+ return binaryData;
+}
+
+TEqWidthHistogramEstimator::TEqWidthHistogramEstimator(std::shared_ptr<TEqWidthHistogram> histogram)
+ : Histogram(histogram) {
+ const auto numBuckets = Histogram->GetNumBuckets();
+ PrefixSum = TVector<ui64>(numBuckets);
+ SuffixSum = TVector<ui64>(numBuckets);
+ CreatePrefixSum(numBuckets);
+ CreateSuffixSum(numBuckets);
+}
+
+void TEqWidthHistogramEstimator::CreatePrefixSum(ui32 numBuckets) {
+ PrefixSum[0] = Histogram->GetNumElementsInBucket(0);
+ for (ui32 i = 1; i < numBuckets; ++i) {
+ PrefixSum[i] = PrefixSum[i - 1] + Histogram->GetNumElementsInBucket(i);
+ }
+}
+
+void TEqWidthHistogramEstimator::CreateSuffixSum(ui32 numBuckets) {
+ SuffixSum[numBuckets - 1] = Histogram->GetNumElementsInBucket(numBuckets - 1);
+ for (i32 i = static_cast<i32>(numBuckets) - 2; i >= 0; --i) {
+ SuffixSum[i] = SuffixSum[i + 1] + Histogram->GetNumElementsInBucket(i);
+ }
+}
+} // namespace NKikimr