summaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/histogram
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-06-18 11:37:13 +0300
committervvvv <[email protected]>2025-06-18 13:38:30 +0300
commitfafaf82fef03d47443d3563fb5ea36af8fa71e64 (patch)
tree517c43562240391278d797c4793eb9cee8b6986f /yql/essentials/core/histogram
parent06674e69d9005bafa2ac27df970398fd1c389fdf (diff)
YQL-20086 core
commit_hash:af5d81d51befa5cee331fbed69e7e5db2014a260
Diffstat (limited to 'yql/essentials/core/histogram')
-rw-r--r--yql/essentials/core/histogram/eq_width_histogram.cpp28
-rw-r--r--yql/essentials/core/histogram/eq_width_histogram.h66
2 files changed, 47 insertions, 47 deletions
diff --git a/yql/essentials/core/histogram/eq_width_histogram.cpp b/yql/essentials/core/histogram/eq_width_histogram.cpp
index 3c5a452fdbd..b83fc1638c6 100644
--- a/yql/essentials/core/histogram/eq_width_histogram.cpp
+++ b/yql/essentials/core/histogram/eq_width_histogram.cpp
@@ -3,7 +3,7 @@
namespace NKikimr {
TEqWidthHistogram::TEqWidthHistogram(ui32 numBuckets, EHistogramValueType valueType)
- : ValueType(valueType), Buckets(numBuckets) {
+ : ValueType_(valueType), Buckets_(numBuckets) {
// Exptected at least one bucket for histogram.
Y_ASSERT(numBuckets >= 1);
}
@@ -13,11 +13,11 @@ TEqWidthHistogram::TEqWidthHistogram(const char *str, ui64 size) {
const ui32 numBuckets = *reinterpret_cast<const ui32 *>(str);
Y_ABORT_UNLESS(GetBinarySize(numBuckets) == size);
ui32 offset = sizeof(ui32);
- ValueType = *reinterpret_cast<const EHistogramValueType *>(str + offset);
+ ValueType_ = *reinterpret_cast<const EHistogramValueType *>(str + offset);
offset += sizeof(EHistogramValueType);
- Buckets = TVector<TBucket>(numBuckets);
+ Buckets_ = TVector<TBucket>(numBuckets);
for (ui32 i = 0; i < numBuckets; ++i) {
- std::memcpy(&Buckets[i], reinterpret_cast<const char *>(str + offset), sizeof(TBucket));
+ std::memcpy(&Buckets_[i], reinterpret_cast<const char *>(str + offset), sizeof(TBucket));
offset += sizeof(TBucket);
}
}
@@ -38,36 +38,36 @@ std::unique_ptr<char> TEqWidthHistogram::Serialize(ui64 &binarySize) const {
std::memcpy(binaryData.get(), &numBuckets, sizeof(ui32));
offset += sizeof(ui32);
// 1 byte - values type.
- std::memcpy(binaryData.get() + offset, &ValueType, sizeof(EHistogramValueType));
+ std::memcpy(binaryData.get() + offset, &ValueType_, sizeof(EHistogramValueType));
offset += sizeof(EHistogramValueType);
// Buckets.
for (ui32 i = 0; i < numBuckets; ++i) {
- std::memcpy(binaryData.get() + offset, &Buckets[i], sizeof(TBucket));
+ std::memcpy(binaryData.get() + offset, &Buckets_[i], sizeof(TBucket));
offset += sizeof(TBucket);
}
return binaryData;
}
TEqWidthHistogramEstimator::TEqWidthHistogramEstimator(std::shared_ptr<TEqWidthHistogram> histogram)
- : Histogram(histogram) {
- const auto numBuckets = Histogram->GetNumBuckets();
- PrefixSum = TVector<ui64>(numBuckets);
- SuffixSum = TVector<ui64>(numBuckets);
+ : Histogram_(histogram) {
+ const auto numBuckets = Histogram_->GetNumBuckets();
+ PrefixSum_ = TVector<ui64>(numBuckets);
+ SuffixSum_ = TVector<ui64>(numBuckets);
CreatePrefixSum(numBuckets);
CreateSuffixSum(numBuckets);
}
void TEqWidthHistogramEstimator::CreatePrefixSum(ui32 numBuckets) {
- PrefixSum[0] = Histogram->GetNumElementsInBucket(0);
+ PrefixSum_[0] = Histogram_->GetNumElementsInBucket(0);
for (ui32 i = 1; i < numBuckets; ++i) {
- PrefixSum[i] = PrefixSum[i - 1] + Histogram->GetNumElementsInBucket(i);
+ PrefixSum_[i] = PrefixSum_[i - 1] + Histogram_->GetNumElementsInBucket(i);
}
}
void TEqWidthHistogramEstimator::CreateSuffixSum(ui32 numBuckets) {
- SuffixSum[numBuckets - 1] = Histogram->GetNumElementsInBucket(numBuckets - 1);
+ SuffixSum_[numBuckets - 1] = Histogram_->GetNumElementsInBucket(numBuckets - 1);
for (i32 i = static_cast<i32>(numBuckets) - 2; i >= 0; --i) {
- SuffixSum[i] = SuffixSum[i + 1] + Histogram->GetNumElementsInBucket(i);
+ SuffixSum_[i] = SuffixSum_[i + 1] + Histogram_->GetNumElementsInBucket(i);
}
}
} // namespace NKikimr
diff --git a/yql/essentials/core/histogram/eq_width_histogram.h b/yql/essentials/core/histogram/eq_width_histogram.h
index 97c660af76b..261f2aaafc3 100644
--- a/yql/essentials/core/histogram/eq_width_histogram.h
+++ b/yql/essentials/core/histogram/eq_width_histogram.h
@@ -68,11 +68,11 @@ class TEqWidthHistogram {
void AddElement(T val) {
const auto index = FindBucketIndex(val);
// The given `index` in range [0, numBuckets - 1].
- const T bucketValue = LoadFrom<T>(Buckets[index].Start);
+ const T bucketValue = LoadFrom<T>(Buckets_[index].Start);
if (!index || ((CmpEqual<T>(bucketValue, val) || CmpLess<T>(bucketValue, val)))) {
- Buckets[index].Count++;
+ Buckets_[index].Count++;
} else {
- Buckets[index - 1].Count++;
+ Buckets_[index - 1].Count++;
}
}
@@ -85,7 +85,7 @@ class TEqWidthHistogram {
ui32 end = GetNumBuckets() - 1;
while (start < end) {
auto it = start + (end - start) / 2;
- if (CmpLess<T>(LoadFrom<T>(Buckets[it].Start), val)) {
+ if (CmpLess<T>(LoadFrom<T>(Buckets_[it].Start), val)) {
start = it + 1;
} else {
end = it;
@@ -95,15 +95,15 @@ class TEqWidthHistogram {
}
// Returns a number of buckets in a histogram.
- ui32 GetNumBuckets() const { return Buckets.size(); }
+ ui32 GetNumBuckets() const { return Buckets_.size(); }
template <typename T>
ui32 GetBucketWidth() const {
Y_ASSERT(GetNumBuckets());
if (GetNumBuckets() == 1) {
- return std::max(static_cast<ui32>(LoadFrom<T>(Buckets.front().Start)), 1U);
+ return std::max(static_cast<ui32>(LoadFrom<T>(Buckets_.front().Start)), 1U);
} else {
- return std::max(static_cast<ui32>(LoadFrom<T>(Buckets[1].Start) - LoadFrom<T>(Buckets[0].Start)), 1U);
+ return std::max(static_cast<ui32>(LoadFrom<T>(Buckets_[1].Start) - LoadFrom<T>(Buckets_[0].Start)), 1U);
}
}
@@ -113,46 +113,46 @@ class TEqWidthHistogram {
}
// Returns histogram type.
- EHistogramValueType GetType() const { return ValueType; }
+ EHistogramValueType GetType() const { return ValueType_; }
// Returns a number of elements in a bucket by the given `index`.
- ui64 GetNumElementsInBucket(ui32 index) const { return Buckets[index].Count; }
+ ui64 GetNumElementsInBucket(ui32 index) const { return Buckets_[index].Count; }
// Initializes buckets with a given `range`.
template <typename T>
void InitializeBuckets(const TBucketRange &range) {
Y_ASSERT(CmpLess<T>(LoadFrom<T>(range.Start), LoadFrom<T>(range.End)));
T rangeLen = LoadFrom<T>(range.End) - LoadFrom<T>(range.Start);
- std::memcpy(Buckets[0].Start, range.Start, sizeof(range.Start));
+ std::memcpy(Buckets_[0].Start, range.Start, sizeof(range.Start));
for (ui32 i = 1; i < GetNumBuckets(); ++i) {
- const T prevStart = LoadFrom<T>(Buckets[i - 1].Start);
- StoreTo<T>(Buckets[i].Start, prevStart + rangeLen);
+ const T prevStart = LoadFrom<T>(Buckets_[i - 1].Start);
+ StoreTo<T>(Buckets_[i].Start, prevStart + rangeLen);
}
}
// Seriailizes to a binary representation
std::unique_ptr<char> Serialize(ui64 &binSize) const;
// Returns buckets.
- const TVector<TBucket> &GetBuckets() const { return Buckets; }
+ const TVector<TBucket> &GetBuckets() const { return Buckets_; }
template <typename T>
void Aggregate(const TEqWidthHistogram &other) {
- if ((this->ValueType != other.GetType()) || (!BucketsEqual<T>(other))) {
+ if ((this->ValueType_ != other.GetType()) || (!BucketsEqual<T>(other))) {
// Should we fail?
return;
}
- for (ui32 i = 0; i < Buckets.size(); ++i) {
- Buckets[i].Count += other.GetBuckets()[i].Count;
+ for (ui32 i = 0; i < Buckets_.size(); ++i) {
+ Buckets_[i].Count += other.GetBuckets()[i].Count;
}
}
private:
template <typename T>
bool BucketsEqual(const TEqWidthHistogram &other) {
- if (Buckets.size() != other.GetNumBuckets()) {
+ if (Buckets_.size() != other.GetNumBuckets()) {
return false;
}
- for (ui32 i = 0; i < Buckets.size(); ++i) {
- if (!CmpEqual<T>(LoadFrom<T>(Buckets[i].Start), LoadFrom<T>(GetBuckets()[i].Start))) {
+ for (ui32 i = 0; i < Buckets_.size(); ++i) {
+ if (!CmpEqual<T>(LoadFrom<T>(Buckets_[i].Start), LoadFrom<T>(GetBuckets()[i].Start))) {
return false;
}
}
@@ -161,8 +161,8 @@ class TEqWidthHistogram {
// Returns binary size of the histogram.
ui64 GetBinarySize(ui32 nBuckets) const;
- EHistogramValueType ValueType;
- TVector<TBucket> Buckets;
+ EHistogramValueType ValueType_;
+ TVector<TBucket> Buckets_;
};
// This class represents a machinery to estimate a value in a histogram.
@@ -173,45 +173,45 @@ class TEqWidthHistogramEstimator {
// Methods to estimate values.
template <typename T>
ui64 EstimateLessOrEqual(T val) const {
- return EstimateOrEqual<T>(val, PrefixSum);
+ return EstimateOrEqual<T>(val, PrefixSum_);
}
template <typename T>
ui64 EstimateGreaterOrEqual(T val) const {
- return EstimateOrEqual<T>(val, SuffixSum);
+ return EstimateOrEqual<T>(val, SuffixSum_);
}
template <typename T>
ui64 EstimateLess(T val) const {
- return EstimateNotEqual<T>(val, PrefixSum);
+ return EstimateNotEqual<T>(val, PrefixSum_);
}
template <typename T>
ui64 EstimateGreater(T val) const {
- return EstimateNotEqual<T>(val, SuffixSum);
+ return EstimateNotEqual<T>(val, SuffixSum_);
}
template <typename T>
ui64 EstimateEqual(T val) const {
- const auto index = Histogram->FindBucketIndex(val);
+ const auto index = Histogram_->FindBucketIndex(val);
// Assuming uniform distribution.
- return std::max(1U, static_cast<ui32>(Histogram->GetNumElementsInBucket(index) / Histogram->template GetBucketWidth<T>()));
+ return std::max(1U, static_cast<ui32>(Histogram_->GetNumElementsInBucket(index) / Histogram_->template GetBucketWidth<T>()));
}
// Returns the total number elements in histogram.
// Could be used to adjust scale.
- ui64 GetNumElements() const { return PrefixSum.back(); }
+ ui64 GetNumElements() const { return PrefixSum_.back(); }
private:
template <typename T>
ui64 EstimateOrEqual(T val, const TVector<ui64> &sumArray) const {
- const auto index = Histogram->FindBucketIndex(val);
+ const auto index = Histogram_->FindBucketIndex(val);
return sumArray[index];
}
template <typename T>
ui64 EstimateNotEqual(T val, const TVector<ui64> &sumArray) const {
- const auto index = Histogram->FindBucketIndex(val);
+ const auto index = Histogram_->FindBucketIndex(val);
// Take the previous backet if it's not the first one.
if (!index) {
return sumArray[index];
@@ -221,8 +221,8 @@ class TEqWidthHistogramEstimator {
void CreatePrefixSum(ui32 numBuckets);
void CreateSuffixSum(ui32 numBuckets);
- std::shared_ptr<TEqWidthHistogram> Histogram;
- TVector<ui64> PrefixSum;
- TVector<ui64> SuffixSum;
+ std::shared_ptr<TEqWidthHistogram> Histogram_;
+ TVector<ui64> PrefixSum_;
+ TVector<ui64> SuffixSum_;
};
} // namespace NKikimr