diff options
| author | vvvv <[email protected]> | 2025-06-18 11:37:13 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2025-06-18 13:38:30 +0300 |
| commit | fafaf82fef03d47443d3563fb5ea36af8fa71e64 (patch) | |
| tree | 517c43562240391278d797c4793eb9cee8b6986f /yql/essentials/core/histogram | |
| parent | 06674e69d9005bafa2ac27df970398fd1c389fdf (diff) | |
YQL-20086 core
commit_hash:af5d81d51befa5cee331fbed69e7e5db2014a260
Diffstat (limited to 'yql/essentials/core/histogram')
| -rw-r--r-- | yql/essentials/core/histogram/eq_width_histogram.cpp | 28 | ||||
| -rw-r--r-- | yql/essentials/core/histogram/eq_width_histogram.h | 66 |
2 files changed, 47 insertions, 47 deletions
diff --git a/yql/essentials/core/histogram/eq_width_histogram.cpp b/yql/essentials/core/histogram/eq_width_histogram.cpp index 3c5a452fdbd..b83fc1638c6 100644 --- a/yql/essentials/core/histogram/eq_width_histogram.cpp +++ b/yql/essentials/core/histogram/eq_width_histogram.cpp @@ -3,7 +3,7 @@ namespace NKikimr { TEqWidthHistogram::TEqWidthHistogram(ui32 numBuckets, EHistogramValueType valueType) - : ValueType(valueType), Buckets(numBuckets) { + : ValueType_(valueType), Buckets_(numBuckets) { // Exptected at least one bucket for histogram. Y_ASSERT(numBuckets >= 1); } @@ -13,11 +13,11 @@ TEqWidthHistogram::TEqWidthHistogram(const char *str, ui64 size) { const ui32 numBuckets = *reinterpret_cast<const ui32 *>(str); Y_ABORT_UNLESS(GetBinarySize(numBuckets) == size); ui32 offset = sizeof(ui32); - ValueType = *reinterpret_cast<const EHistogramValueType *>(str + offset); + ValueType_ = *reinterpret_cast<const EHistogramValueType *>(str + offset); offset += sizeof(EHistogramValueType); - Buckets = TVector<TBucket>(numBuckets); + Buckets_ = TVector<TBucket>(numBuckets); for (ui32 i = 0; i < numBuckets; ++i) { - std::memcpy(&Buckets[i], reinterpret_cast<const char *>(str + offset), sizeof(TBucket)); + std::memcpy(&Buckets_[i], reinterpret_cast<const char *>(str + offset), sizeof(TBucket)); offset += sizeof(TBucket); } } @@ -38,36 +38,36 @@ std::unique_ptr<char> TEqWidthHistogram::Serialize(ui64 &binarySize) const { std::memcpy(binaryData.get(), &numBuckets, sizeof(ui32)); offset += sizeof(ui32); // 1 byte - values type. - std::memcpy(binaryData.get() + offset, &ValueType, sizeof(EHistogramValueType)); + std::memcpy(binaryData.get() + offset, &ValueType_, sizeof(EHistogramValueType)); offset += sizeof(EHistogramValueType); // Buckets. for (ui32 i = 0; i < numBuckets; ++i) { - std::memcpy(binaryData.get() + offset, &Buckets[i], sizeof(TBucket)); + std::memcpy(binaryData.get() + offset, &Buckets_[i], sizeof(TBucket)); offset += sizeof(TBucket); } return binaryData; } TEqWidthHistogramEstimator::TEqWidthHistogramEstimator(std::shared_ptr<TEqWidthHistogram> histogram) - : Histogram(histogram) { - const auto numBuckets = Histogram->GetNumBuckets(); - PrefixSum = TVector<ui64>(numBuckets); - SuffixSum = TVector<ui64>(numBuckets); + : Histogram_(histogram) { + const auto numBuckets = Histogram_->GetNumBuckets(); + PrefixSum_ = TVector<ui64>(numBuckets); + SuffixSum_ = TVector<ui64>(numBuckets); CreatePrefixSum(numBuckets); CreateSuffixSum(numBuckets); } void TEqWidthHistogramEstimator::CreatePrefixSum(ui32 numBuckets) { - PrefixSum[0] = Histogram->GetNumElementsInBucket(0); + PrefixSum_[0] = Histogram_->GetNumElementsInBucket(0); for (ui32 i = 1; i < numBuckets; ++i) { - PrefixSum[i] = PrefixSum[i - 1] + Histogram->GetNumElementsInBucket(i); + PrefixSum_[i] = PrefixSum_[i - 1] + Histogram_->GetNumElementsInBucket(i); } } void TEqWidthHistogramEstimator::CreateSuffixSum(ui32 numBuckets) { - SuffixSum[numBuckets - 1] = Histogram->GetNumElementsInBucket(numBuckets - 1); + SuffixSum_[numBuckets - 1] = Histogram_->GetNumElementsInBucket(numBuckets - 1); for (i32 i = static_cast<i32>(numBuckets) - 2; i >= 0; --i) { - SuffixSum[i] = SuffixSum[i + 1] + Histogram->GetNumElementsInBucket(i); + SuffixSum_[i] = SuffixSum_[i + 1] + Histogram_->GetNumElementsInBucket(i); } } } // namespace NKikimr diff --git a/yql/essentials/core/histogram/eq_width_histogram.h b/yql/essentials/core/histogram/eq_width_histogram.h index 97c660af76b..261f2aaafc3 100644 --- a/yql/essentials/core/histogram/eq_width_histogram.h +++ b/yql/essentials/core/histogram/eq_width_histogram.h @@ -68,11 +68,11 @@ class TEqWidthHistogram { void AddElement(T val) { const auto index = FindBucketIndex(val); // The given `index` in range [0, numBuckets - 1]. - const T bucketValue = LoadFrom<T>(Buckets[index].Start); + const T bucketValue = LoadFrom<T>(Buckets_[index].Start); if (!index || ((CmpEqual<T>(bucketValue, val) || CmpLess<T>(bucketValue, val)))) { - Buckets[index].Count++; + Buckets_[index].Count++; } else { - Buckets[index - 1].Count++; + Buckets_[index - 1].Count++; } } @@ -85,7 +85,7 @@ class TEqWidthHistogram { ui32 end = GetNumBuckets() - 1; while (start < end) { auto it = start + (end - start) / 2; - if (CmpLess<T>(LoadFrom<T>(Buckets[it].Start), val)) { + if (CmpLess<T>(LoadFrom<T>(Buckets_[it].Start), val)) { start = it + 1; } else { end = it; @@ -95,15 +95,15 @@ class TEqWidthHistogram { } // Returns a number of buckets in a histogram. - ui32 GetNumBuckets() const { return Buckets.size(); } + ui32 GetNumBuckets() const { return Buckets_.size(); } template <typename T> ui32 GetBucketWidth() const { Y_ASSERT(GetNumBuckets()); if (GetNumBuckets() == 1) { - return std::max(static_cast<ui32>(LoadFrom<T>(Buckets.front().Start)), 1U); + return std::max(static_cast<ui32>(LoadFrom<T>(Buckets_.front().Start)), 1U); } else { - return std::max(static_cast<ui32>(LoadFrom<T>(Buckets[1].Start) - LoadFrom<T>(Buckets[0].Start)), 1U); + return std::max(static_cast<ui32>(LoadFrom<T>(Buckets_[1].Start) - LoadFrom<T>(Buckets_[0].Start)), 1U); } } @@ -113,46 +113,46 @@ class TEqWidthHistogram { } // Returns histogram type. - EHistogramValueType GetType() const { return ValueType; } + EHistogramValueType GetType() const { return ValueType_; } // Returns a number of elements in a bucket by the given `index`. - ui64 GetNumElementsInBucket(ui32 index) const { return Buckets[index].Count; } + ui64 GetNumElementsInBucket(ui32 index) const { return Buckets_[index].Count; } // Initializes buckets with a given `range`. template <typename T> void InitializeBuckets(const TBucketRange &range) { Y_ASSERT(CmpLess<T>(LoadFrom<T>(range.Start), LoadFrom<T>(range.End))); T rangeLen = LoadFrom<T>(range.End) - LoadFrom<T>(range.Start); - std::memcpy(Buckets[0].Start, range.Start, sizeof(range.Start)); + std::memcpy(Buckets_[0].Start, range.Start, sizeof(range.Start)); for (ui32 i = 1; i < GetNumBuckets(); ++i) { - const T prevStart = LoadFrom<T>(Buckets[i - 1].Start); - StoreTo<T>(Buckets[i].Start, prevStart + rangeLen); + const T prevStart = LoadFrom<T>(Buckets_[i - 1].Start); + StoreTo<T>(Buckets_[i].Start, prevStart + rangeLen); } } // Seriailizes to a binary representation std::unique_ptr<char> Serialize(ui64 &binSize) const; // Returns buckets. - const TVector<TBucket> &GetBuckets() const { return Buckets; } + const TVector<TBucket> &GetBuckets() const { return Buckets_; } template <typename T> void Aggregate(const TEqWidthHistogram &other) { - if ((this->ValueType != other.GetType()) || (!BucketsEqual<T>(other))) { + if ((this->ValueType_ != other.GetType()) || (!BucketsEqual<T>(other))) { // Should we fail? return; } - for (ui32 i = 0; i < Buckets.size(); ++i) { - Buckets[i].Count += other.GetBuckets()[i].Count; + for (ui32 i = 0; i < Buckets_.size(); ++i) { + Buckets_[i].Count += other.GetBuckets()[i].Count; } } private: template <typename T> bool BucketsEqual(const TEqWidthHistogram &other) { - if (Buckets.size() != other.GetNumBuckets()) { + if (Buckets_.size() != other.GetNumBuckets()) { return false; } - for (ui32 i = 0; i < Buckets.size(); ++i) { - if (!CmpEqual<T>(LoadFrom<T>(Buckets[i].Start), LoadFrom<T>(GetBuckets()[i].Start))) { + for (ui32 i = 0; i < Buckets_.size(); ++i) { + if (!CmpEqual<T>(LoadFrom<T>(Buckets_[i].Start), LoadFrom<T>(GetBuckets()[i].Start))) { return false; } } @@ -161,8 +161,8 @@ class TEqWidthHistogram { // Returns binary size of the histogram. ui64 GetBinarySize(ui32 nBuckets) const; - EHistogramValueType ValueType; - TVector<TBucket> Buckets; + EHistogramValueType ValueType_; + TVector<TBucket> Buckets_; }; // This class represents a machinery to estimate a value in a histogram. @@ -173,45 +173,45 @@ class TEqWidthHistogramEstimator { // Methods to estimate values. template <typename T> ui64 EstimateLessOrEqual(T val) const { - return EstimateOrEqual<T>(val, PrefixSum); + return EstimateOrEqual<T>(val, PrefixSum_); } template <typename T> ui64 EstimateGreaterOrEqual(T val) const { - return EstimateOrEqual<T>(val, SuffixSum); + return EstimateOrEqual<T>(val, SuffixSum_); } template <typename T> ui64 EstimateLess(T val) const { - return EstimateNotEqual<T>(val, PrefixSum); + return EstimateNotEqual<T>(val, PrefixSum_); } template <typename T> ui64 EstimateGreater(T val) const { - return EstimateNotEqual<T>(val, SuffixSum); + return EstimateNotEqual<T>(val, SuffixSum_); } template <typename T> ui64 EstimateEqual(T val) const { - const auto index = Histogram->FindBucketIndex(val); + const auto index = Histogram_->FindBucketIndex(val); // Assuming uniform distribution. - return std::max(1U, static_cast<ui32>(Histogram->GetNumElementsInBucket(index) / Histogram->template GetBucketWidth<T>())); + return std::max(1U, static_cast<ui32>(Histogram_->GetNumElementsInBucket(index) / Histogram_->template GetBucketWidth<T>())); } // Returns the total number elements in histogram. // Could be used to adjust scale. - ui64 GetNumElements() const { return PrefixSum.back(); } + ui64 GetNumElements() const { return PrefixSum_.back(); } private: template <typename T> ui64 EstimateOrEqual(T val, const TVector<ui64> &sumArray) const { - const auto index = Histogram->FindBucketIndex(val); + const auto index = Histogram_->FindBucketIndex(val); return sumArray[index]; } template <typename T> ui64 EstimateNotEqual(T val, const TVector<ui64> &sumArray) const { - const auto index = Histogram->FindBucketIndex(val); + const auto index = Histogram_->FindBucketIndex(val); // Take the previous backet if it's not the first one. if (!index) { return sumArray[index]; @@ -221,8 +221,8 @@ class TEqWidthHistogramEstimator { void CreatePrefixSum(ui32 numBuckets); void CreateSuffixSum(ui32 numBuckets); - std::shared_ptr<TEqWidthHistogram> Histogram; - TVector<ui64> PrefixSum; - TVector<ui64> SuffixSum; + std::shared_ptr<TEqWidthHistogram> Histogram_; + TVector<ui64> PrefixSum_; + TVector<ui64> SuffixSum_; }; } // namespace NKikimr |
