diff options
author | zosimov <zosimov@yandex-team.ru> | 2022-02-10 16:50:32 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:32 +0300 |
commit | a1d67d6a31f789aa011250c3edce5751c0cadad2 (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/histogram/adaptive/adaptive_histogram.cpp | |
parent | a8f009e06d613c9567eb4c0f461dbed5e0d8092b (diff) | |
download | ydb-a1d67d6a31f789aa011250c3edce5751c0cadad2.tar.gz |
Restoring authorship annotation for <zosimov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/histogram/adaptive/adaptive_histogram.cpp')
-rw-r--r-- | library/cpp/histogram/adaptive/adaptive_histogram.cpp | 850 |
1 files changed, 425 insertions, 425 deletions
diff --git a/library/cpp/histogram/adaptive/adaptive_histogram.cpp b/library/cpp/histogram/adaptive/adaptive_histogram.cpp index e73b45c2c6..cbfc494021 100644 --- a/library/cpp/histogram/adaptive/adaptive_histogram.cpp +++ b/library/cpp/histogram/adaptive/adaptive_histogram.cpp @@ -1,72 +1,72 @@ -#include "adaptive_histogram.h" - +#include "adaptive_histogram.h" + #include <util/generic/algorithm.h> -#include <util/generic/yexception.h> -#include <util/generic/ymath.h> -#include <util/string/printf.h> - +#include <util/generic/yexception.h> +#include <util/generic/ymath.h> +#include <util/string/printf.h> + #include <util/system/backtrace.h> -namespace NKiwiAggr { +namespace NKiwiAggr { TAdaptiveHistogram::TAdaptiveHistogram(size_t intervals, ui64 id, TQualityFunction qualityFunc) - : Id(id) - , Sum(0.0) - , Intervals(intervals) - , CalcQuality(qualityFunc) - { - } - + : Id(id) + , Sum(0.0) + , Intervals(intervals) + , CalcQuality(qualityFunc) + { + } + TAdaptiveHistogram::TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc) : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc) - { - FromProto(histo); - } - + { + FromProto(histo); + } + TAdaptiveHistogram::TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc) : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc) - { - TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo); - if (!adaptiveHisto) { - FromIHistogram(histo); - return; - } - Id = adaptiveHisto->Id; - MinValue = adaptiveHisto->MinValue; - MaxValue = adaptiveHisto->MaxValue; - Sum = adaptiveHisto->Sum; - Intervals = adaptiveHisto->Intervals; - Bins = adaptiveHisto->Bins; - BinsByQuality = adaptiveHisto->BinsByQuality; + { + TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo); + if (!adaptiveHisto) { + FromIHistogram(histo); + return; + } + Id = adaptiveHisto->Id; + MinValue = adaptiveHisto->MinValue; + MaxValue = adaptiveHisto->MaxValue; + Sum = adaptiveHisto->Sum; + Intervals = adaptiveHisto->Intervals; + Bins = adaptiveHisto->Bins; + BinsByQuality = adaptiveHisto->BinsByQuality; if (CalcQuality == nullptr) { - CalcQuality = adaptiveHisto->CalcQuality; - } - } - + CalcQuality = adaptiveHisto->CalcQuality; + } + } + TQualityFunction TAdaptiveHistogram::GetQualityFunc() { - return CalcQuality; - } - - void TAdaptiveHistogram::Clear() { - Sum = 0.0; - Bins.clear(); - BinsByQuality.clear(); - } - - void TAdaptiveHistogram::Add(const THistoRec& histoRec) { - if (!histoRec.HasId() || histoRec.GetId() == Id) { - Add(histoRec.GetValue(), histoRec.GetWeight()); - } - } - - void TAdaptiveHistogram::Add(double value, double weight) { - if (!IsValidFloat(value) || !IsValidFloat(weight)) { + return CalcQuality; + } + + void TAdaptiveHistogram::Clear() { + Sum = 0.0; + Bins.clear(); + BinsByQuality.clear(); + } + + void TAdaptiveHistogram::Add(const THistoRec& histoRec) { + if (!histoRec.HasId() || histoRec.GetId() == Id) { + Add(histoRec.GetValue(), histoRec.GetWeight()); + } + } + + void TAdaptiveHistogram::Add(double value, double weight) { + if (!IsValidFloat(value) || !IsValidFloat(weight)) { ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight); - } - TWeightedValue weightedValue(value, weight); - Add(weightedValue, true); + } + TWeightedValue weightedValue(value, weight); + Add(weightedValue, true); PrecomputedBins.clear(); - } - + } + void TAdaptiveHistogram::Merge(const THistogram& histo, double multiplier) { if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) { fprintf(stderr, "Merging in histogram id %lu: skip bad histo with minvalue %f maxvalue %f\n", Id, histo.GetMinValue(), histo.GetMaxValue()); @@ -87,7 +87,7 @@ namespace NKiwiAggr { if (!IsValidFloat(value) || !IsValidFloat(weight)) { fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight); continue; - } + } Add(value, weight * multiplier); } @@ -99,20 +99,20 @@ namespace NKiwiAggr { double weight = histo.GetFreq(j); if (!IsValidFloat(pos) || !IsValidFloat(weight)) { fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, pos, weight); - pos += histo.GetBinRange(); + pos += histo.GetBinRange(); continue; - } + } Add(pos, weight * multiplier); pos += histo.GetBinRange(); } - + MinValue = Min(MinValue, histo.GetMinValue()); MaxValue = Max(MaxValue, histo.GetMaxValue()); } else { ythrow yexception() << "Unknown THistogram type"; - } - } - + } + } + void TAdaptiveHistogram::Merge(const TVector<THistogram>& histogramsToMerge) { for (size_t i = 0; i < histogramsToMerge.size(); ++i) { Merge(histogramsToMerge[i], 1.0); @@ -122,64 +122,64 @@ namespace NKiwiAggr { void TAdaptiveHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) { TVector<IHistogramPtr> histogramsToMergeRepacked(0); TVector<TAdaptiveHistogram*> histograms(0); - for (size_t i = 0; i < histogramsToMerge.size(); ++i) { - if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) { - continue; - } - TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get()); - if (adaptiveHisto) { - histogramsToMergeRepacked.push_back(histogramsToMerge[i]); - } else { + for (size_t i = 0; i < histogramsToMerge.size(); ++i) { + if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) { + continue; + } + TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get()); + if (adaptiveHisto) { + histogramsToMergeRepacked.push_back(histogramsToMerge[i]); + } else { histogramsToMergeRepacked.push_back(IHistogramPtr(new TAdaptiveHistogram(histogramsToMerge[i].Get(), Intervals, Id, CalcQuality))); // Convert histograms that are not of TFixedBinHistogram type - } - if (histogramsToMergeRepacked.back()->Empty()) { - continue; - } - histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get())); - } - - if (histograms.size() == 0) { - return; - } - - for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) { - TAdaptiveHistogram* histo = histograms[histoIndex]; - for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) { - Add(*it, true); - } - } - - for (size_t i = 0; i < histograms.size(); ++i) { - MinValue = Min(MinValue, histograms[i]->MinValue); - MaxValue = Max(MaxValue, histograms[i]->MaxValue); - } - } - - void TAdaptiveHistogram::Multiply(double factor) { - if (!IsValidFloat(factor) || factor <= 0) { - ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor; - } - Sum *= factor; - - TPairSet newBins; - for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) { - newBins.insert(TWeightedValue(it->first, it->second * factor)); - } - Bins = newBins; - - TPairSet newBinsByQuality; - for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) { - TPairSet::iterator rightBin = it; - ++rightBin; - if (rightBin == Bins.end()) { - break; - } - newBinsByQuality.insert(CalcQuality(*it, *rightBin)); - } - BinsByQuality = newBinsByQuality; - } - - void TAdaptiveHistogram::FromProto(const THistogram& histo) { + } + if (histogramsToMergeRepacked.back()->Empty()) { + continue; + } + histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get())); + } + + if (histograms.size() == 0) { + return; + } + + for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) { + TAdaptiveHistogram* histo = histograms[histoIndex]; + for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) { + Add(*it, true); + } + } + + for (size_t i = 0; i < histograms.size(); ++i) { + MinValue = Min(MinValue, histograms[i]->MinValue); + MaxValue = Max(MaxValue, histograms[i]->MaxValue); + } + } + + void TAdaptiveHistogram::Multiply(double factor) { + if (!IsValidFloat(factor) || factor <= 0) { + ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor; + } + Sum *= factor; + + TPairSet newBins; + for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) { + newBins.insert(TWeightedValue(it->first, it->second * factor)); + } + Bins = newBins; + + TPairSet newBinsByQuality; + for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) { + TPairSet::iterator rightBin = it; + ++rightBin; + if (rightBin == Bins.end()) { + break; + } + newBinsByQuality.insert(CalcQuality(*it, *rightBin)); + } + BinsByQuality = newBinsByQuality; + } + + void TAdaptiveHistogram::FromProto(const THistogram& histo) { Y_VERIFY(histo.HasType(), "Attempt to parse TAdaptiveHistogram from THistogram protobuf with no Type field set"); ; switch (histo.GetType()) { // check that histogram type could be deduced @@ -193,107 +193,107 @@ namespace NKiwiAggr { [[fallthrough]]; default: // not ok ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record of type = " << (ui32)histo.GetType(); - } + } - if (histo.FreqSize() != histo.PositionSize()) { - ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize(); - } + if (histo.FreqSize() != histo.PositionSize()) { + ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize(); + } if (CalcQuality == nullptr) { - if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) { - CalcQuality = CalcDistanceQuality; - } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) { - CalcQuality = CalcWeightQuality; + if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) { + CalcQuality = CalcDistanceQuality; + } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) { + CalcQuality = CalcWeightQuality; } else if (histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM) { CalcQuality = CalcWardQuality; - } else { - ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function"; - } - } - Id = histo.GetId(); - Sum = 0.0; - Intervals = Max(Intervals, histo.FreqSize()); - for (size_t i = 0; i < histo.FreqSize(); ++i) { - double value = histo.GetPosition(i); - double weight = histo.GetFreq(i); - if (!IsValidFloat(value) || !IsValidFloat(weight)) { + } else { + ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function"; + } + } + Id = histo.GetId(); + Sum = 0.0; + Intervals = Max(Intervals, histo.FreqSize()); + for (size_t i = 0; i < histo.FreqSize(); ++i) { + double value = histo.GetPosition(i); + double weight = histo.GetFreq(i); + if (!IsValidFloat(value) || !IsValidFloat(weight)) { fprintf(stderr, "FromProto in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight); - continue; - } - Add(value, weight); - } - - if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) { + continue; + } + Add(value, weight); + } + + if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) { ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue()); - } - - MinValue = histo.GetMinValue(); - MaxValue = histo.GetMaxValue(); - } - - void TAdaptiveHistogram::ToProto(THistogram& histo) { - histo.Clear(); - if (CalcQuality == CalcDistanceQuality) { - histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM); - } else if (CalcQuality == CalcWeightQuality) { - histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM); + } + + MinValue = histo.GetMinValue(); + MaxValue = histo.GetMaxValue(); + } + + void TAdaptiveHistogram::ToProto(THistogram& histo) { + histo.Clear(); + if (CalcQuality == CalcDistanceQuality) { + histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM); + } else if (CalcQuality == CalcWeightQuality) { + histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM); } else if (CalcQuality == CalcWardQuality) { histo.SetType(HT_ADAPTIVE_WARD_HISTOGRAM); - } else { - histo.SetType(HT_ADAPTIVE_HISTOGRAM); - } - histo.SetId(Id); - if (Empty()) { - return; - } - histo.SetMinValue(MinValue); - histo.SetMaxValue(MaxValue); - for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) { - histo.AddFreq(it->second); - histo.AddPosition(it->first); - } - } - + } else { + histo.SetType(HT_ADAPTIVE_HISTOGRAM); + } + histo.SetId(Id); + if (Empty()) { + return; + } + histo.SetMinValue(MinValue); + histo.SetMaxValue(MaxValue); + for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) { + histo.AddFreq(it->second); + histo.AddPosition(it->first); + } + } + void TAdaptiveHistogram::SetId(ui64 id) { - Id = id; - } - + Id = id; + } + ui64 TAdaptiveHistogram::GetId() { - return Id; - } - - bool TAdaptiveHistogram::Empty() { - return Bins.size() == 0; - } - - double TAdaptiveHistogram::GetMinValue() { - return MinValue; - } - - double TAdaptiveHistogram::GetMaxValue() { - return MaxValue; - } - - double TAdaptiveHistogram::GetSum() { - return Sum; - } - - double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) { - if (leftBound > rightBound) { - return 0.0; - } - return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum; - } - - double TAdaptiveHistogram::GetSumAboveBound(double bound) { - if (Empty()) { - return 0.0; - } - if (bound < MinValue) { - return Sum; - } - if (bound > MaxValue) { - return 0.0; - } + return Id; + } + + bool TAdaptiveHistogram::Empty() { + return Bins.size() == 0; + } + + double TAdaptiveHistogram::GetMinValue() { + return MinValue; + } + + double TAdaptiveHistogram::GetMaxValue() { + return MaxValue; + } + + double TAdaptiveHistogram::GetSum() { + return Sum; + } + + double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) { + if (leftBound > rightBound) { + return 0.0; + } + return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum; + } + + double TAdaptiveHistogram::GetSumAboveBound(double bound) { + if (Empty()) { + return 0.0; + } + if (bound < MinValue) { + return Sum; + } + if (bound > MaxValue) { + return 0.0; + } if (!PrecomputedBins.empty()) { return GetSumAboveBoundImpl( @@ -314,19 +314,19 @@ namespace NKiwiAggr { } return sum; }); - } - } - - double TAdaptiveHistogram::GetSumBelowBound(double bound) { - if (Empty()) { - return 0.0; - } - if (bound < MinValue) { - return 0.0; - } - if (bound > MaxValue) { - return Sum; - } + } + } + + double TAdaptiveHistogram::GetSumBelowBound(double bound) { + if (Empty()) { + return 0.0; + } + if (bound < MinValue) { + return 0.0; + } + if (bound > MaxValue) { + return Sum; + } if (!PrecomputedBins.empty()) { return GetSumBelowBoundImpl( @@ -346,90 +346,90 @@ namespace NKiwiAggr { } return sum; }); - } - } - - double TAdaptiveHistogram::CalcUpperBound(double sum) { + } + } + + double TAdaptiveHistogram::CalcUpperBound(double sum) { Y_VERIFY(sum >= 0, "Sum must be >= 0"); if (sum == 0.0) { return MinValue; } - if (Empty()) { - return MaxValue; - } - TPairSet::iterator current = Bins.begin(); - double gatheredSum = 0.0; - while (current != Bins.end() && gatheredSum < sum) { - gatheredSum += current->second; - ++current; - } - --current; - if (gatheredSum < sum) { - return MaxValue; - } - TWeightedValue left(MinValue, 0.0); - TWeightedValue right(MaxValue, 0.0); - if (current != Bins.begin()) { - TPairSet::iterator leftBin = current; - --leftBin; - left = *leftBin; - } - { - TPairSet::iterator rightBin = current; - ++rightBin; - if (rightBin != Bins.end()) { - right = *rightBin; - } - } - double sumToAdd = sum - (gatheredSum - current->second - left.second / 2); - if (sumToAdd <= ((current->second + left.second) / 2)) { - return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second); - } else { - sumToAdd -= (current->second + left.second) / 2; - return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second); - } - } - - double TAdaptiveHistogram::CalcLowerBound(double sum) { + if (Empty()) { + return MaxValue; + } + TPairSet::iterator current = Bins.begin(); + double gatheredSum = 0.0; + while (current != Bins.end() && gatheredSum < sum) { + gatheredSum += current->second; + ++current; + } + --current; + if (gatheredSum < sum) { + return MaxValue; + } + TWeightedValue left(MinValue, 0.0); + TWeightedValue right(MaxValue, 0.0); + if (current != Bins.begin()) { + TPairSet::iterator leftBin = current; + --leftBin; + left = *leftBin; + } + { + TPairSet::iterator rightBin = current; + ++rightBin; + if (rightBin != Bins.end()) { + right = *rightBin; + } + } + double sumToAdd = sum - (gatheredSum - current->second - left.second / 2); + if (sumToAdd <= ((current->second + left.second) / 2)) { + return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second); + } else { + sumToAdd -= (current->second + left.second) / 2; + return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second); + } + } + + double TAdaptiveHistogram::CalcLowerBound(double sum) { Y_VERIFY(sum >= 0, "Sum must be >= 0"); if (sum == 0.0) { return MaxValue; } - if (Empty()) { - return MinValue; - } - TPairSet::iterator current = Bins.end(); - double gatheredSum = 0.0; - while (current != Bins.begin() && gatheredSum < sum) { - --current; - gatheredSum += current->second; - } - if (gatheredSum < sum) { - return MinValue; - } - TWeightedValue left(MinValue, 0.0); - TWeightedValue right(MaxValue, 0.0); - if (current != Bins.begin()) { - TPairSet::iterator leftBin = current; - --leftBin; - left = *leftBin; - } - { - TPairSet::iterator rightBin = current; - ++rightBin; - if (rightBin != Bins.end()) { - right = *rightBin; - } - } - double sumToAdd = sum - (gatheredSum - current->second - right.second / 2); - if (sumToAdd <= ((current->second + right.second) / 2)) { - return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second); - } else { - sumToAdd -= (current->second + right.second) / 2; - return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second); - } - } - + if (Empty()) { + return MinValue; + } + TPairSet::iterator current = Bins.end(); + double gatheredSum = 0.0; + while (current != Bins.begin() && gatheredSum < sum) { + --current; + gatheredSum += current->second; + } + if (gatheredSum < sum) { + return MinValue; + } + TWeightedValue left(MinValue, 0.0); + TWeightedValue right(MaxValue, 0.0); + if (current != Bins.begin()) { + TPairSet::iterator leftBin = current; + --leftBin; + left = *leftBin; + } + { + TPairSet::iterator rightBin = current; + ++rightBin; + if (rightBin != Bins.end()) { + right = *rightBin; + } + } + double sumToAdd = sum - (gatheredSum - current->second - right.second / 2); + if (sumToAdd <= ((current->second + right.second) / 2)) { + return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second); + } else { + sumToAdd -= (current->second + right.second) / 2; + return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second); + } + } + double TAdaptiveHistogram::CalcUpperBoundSafe(double sum) { if (!Empty()) { sum = Max(Bins.begin()->second, sum); @@ -444,145 +444,145 @@ namespace NKiwiAggr { return CalcLowerBound(sum); } - void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) { - if (!histo) { - ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer"; - } + void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) { + if (!histo) { + ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer"; + } if (CalcQuality == CalcWardQuality) { ythrow yexception() << "Not implemented"; } else if (CalcQuality != CalcDistanceQuality && CalcQuality != CalcWeightQuality) { - ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality"; - } - Id = histo->GetId(); - if (histo->Empty()) { - return; - } - double sum = histo->GetSum(); - double minValue = histo->GetMinValue(); - double maxValue = histo->GetMaxValue(); - if (minValue == maxValue) { - Add(minValue, sum); - return; - } - if (CalcQuality == CalcDistanceQuality) { - double binRange = (maxValue - minValue) / (Intervals); - for (size_t i = 0; i < Intervals; ++i) { - Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1))); - } - } else if (CalcQuality == CalcWeightQuality && sum != 0.0) { - double slab = sum / Intervals; - double prevBound = minValue; - for (size_t i = 0; i < Intervals; ++i) { - double bound = histo->CalcUpperBound(slab * (i + 1)); - Add((bound + prevBound) / 2, slab); - prevBound = bound; - } - } - MinValue = minValue; - MaxValue = maxValue; - } - - void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) { - const double& value = weightedValue.first; - const double& weight = weightedValue.second; - if (weight <= 0.0) { - return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden - } - if (initial) { - Sum += weight; - } - if (Bins.size() == 0) { - MinValue = value; - MaxValue = value; - Bins.insert(weightedValue); - return; - } - if (value < MinValue) { - MinValue = value; - } - if (value > MaxValue) { - MaxValue = value; - } - TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0)); - if (rightBin != Bins.end() && rightBin->first == value) { - TPairSet::iterator currentBin = rightBin; - ++rightBin; - TWeightedValue newBin(value, weight + currentBin->second); - if (rightBin != Bins.end()) { + ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality"; + } + Id = histo->GetId(); + if (histo->Empty()) { + return; + } + double sum = histo->GetSum(); + double minValue = histo->GetMinValue(); + double maxValue = histo->GetMaxValue(); + if (minValue == maxValue) { + Add(minValue, sum); + return; + } + if (CalcQuality == CalcDistanceQuality) { + double binRange = (maxValue - minValue) / (Intervals); + for (size_t i = 0; i < Intervals; ++i) { + Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1))); + } + } else if (CalcQuality == CalcWeightQuality && sum != 0.0) { + double slab = sum / Intervals; + double prevBound = minValue; + for (size_t i = 0; i < Intervals; ++i) { + double bound = histo->CalcUpperBound(slab * (i + 1)); + Add((bound + prevBound) / 2, slab); + prevBound = bound; + } + } + MinValue = minValue; + MaxValue = maxValue; + } + + void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) { + const double& value = weightedValue.first; + const double& weight = weightedValue.second; + if (weight <= 0.0) { + return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden + } + if (initial) { + Sum += weight; + } + if (Bins.size() == 0) { + MinValue = value; + MaxValue = value; + Bins.insert(weightedValue); + return; + } + if (value < MinValue) { + MinValue = value; + } + if (value > MaxValue) { + MaxValue = value; + } + TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0)); + if (rightBin != Bins.end() && rightBin->first == value) { + TPairSet::iterator currentBin = rightBin; + ++rightBin; + TWeightedValue newBin(value, weight + currentBin->second); + if (rightBin != Bins.end()) { Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) == 1, "Erase failed"); - BinsByQuality.insert(CalcQuality(newBin, *rightBin)); - } - if (currentBin != Bins.begin()) { - TPairSet::iterator leftBin = currentBin; - --leftBin; + BinsByQuality.insert(CalcQuality(newBin, *rightBin)); + } + if (currentBin != Bins.begin()) { + TPairSet::iterator leftBin = currentBin; + --leftBin; Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) == 1, "Erase failed"); - BinsByQuality.insert(CalcQuality(*leftBin, newBin)); - } - Bins.erase(currentBin); - Bins.insert(newBin); - return; - } - if (rightBin == Bins.begin()) { - BinsByQuality.insert(CalcQuality(weightedValue, *rightBin)); - } else { - TPairSet::iterator leftBin = rightBin; - --leftBin; - if (rightBin == Bins.end()) { - BinsByQuality.insert(CalcQuality(*leftBin, weightedValue)); - } else { + BinsByQuality.insert(CalcQuality(*leftBin, newBin)); + } + Bins.erase(currentBin); + Bins.insert(newBin); + return; + } + if (rightBin == Bins.begin()) { + BinsByQuality.insert(CalcQuality(weightedValue, *rightBin)); + } else { + TPairSet::iterator leftBin = rightBin; + --leftBin; + if (rightBin == Bins.end()) { + BinsByQuality.insert(CalcQuality(*leftBin, weightedValue)); + } else { Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *rightBin)) == 1, "Erase failed"); - BinsByQuality.insert(CalcQuality(*leftBin, weightedValue)); - BinsByQuality.insert(CalcQuality(weightedValue, *rightBin)); - } - } - Bins.insert(weightedValue); - if (Bins.size() > Intervals) { - Shrink(); - } - } - - void TAdaptiveHistogram::Erase(double value) { - TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0)); + BinsByQuality.insert(CalcQuality(*leftBin, weightedValue)); + BinsByQuality.insert(CalcQuality(weightedValue, *rightBin)); + } + } + Bins.insert(weightedValue); + if (Bins.size() > Intervals) { + Shrink(); + } + } + + void TAdaptiveHistogram::Erase(double value) { + TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0)); Y_VERIFY(currentBin != Bins.end() && currentBin->first == value, "Can't find bin that should be erased"); - TPairSet::iterator rightBin = currentBin; - ++rightBin; - if (currentBin == Bins.begin()) { + TPairSet::iterator rightBin = currentBin; + ++rightBin; + if (currentBin == Bins.begin()) { Y_VERIFY(rightBin != Bins.end(), "No right bin for the first bin"); Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed"); - } else { - TPairSet::iterator leftBin = currentBin; - --leftBin; - if (rightBin == Bins.end()) { + } else { + TPairSet::iterator leftBin = currentBin; + --leftBin; + if (rightBin == Bins.end()) { Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed"); - } else { + } else { Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed"); Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed"); - BinsByQuality.insert(CalcQuality(*leftBin, *rightBin)); - } - } - Bins.erase(currentBin); - } - - void TAdaptiveHistogram::Shrink() { - TPairSet::iterator worstBin = BinsByQuality.begin(); + BinsByQuality.insert(CalcQuality(*leftBin, *rightBin)); + } + } + Bins.erase(currentBin); + } + + void TAdaptiveHistogram::Shrink() { + TPairSet::iterator worstBin = BinsByQuality.begin(); Y_VERIFY(worstBin != BinsByQuality.end(), "No right bin for the first bin"); - TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0)); + TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0)); Y_VERIFY(leftBin != Bins.end() && leftBin->first == worstBin->second, "Can't find worst bin"); - TPairSet::iterator rightBin = leftBin; - ++rightBin; + TPairSet::iterator rightBin = leftBin; + ++rightBin; Y_VERIFY(rightBin != Bins.end(), "Can't find right bin"); - - TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second); - if (Bins.size() > 2) { - Erase(leftBin->first); - Erase(rightBin->first); - } else { - Bins.clear(); - BinsByQuality.clear(); - } - Add(newBin, false); - } - + + TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second); + if (Bins.size() > 2) { + Erase(leftBin->first); + Erase(rightBin->first); + } else { + Bins.clear(); + BinsByQuality.clear(); + } + Add(newBin, false); + } + void TAdaptiveHistogram::PrecomputePartialSums() { PrecomputedBins.clear(); PrecomputedBins.reserve(Bins.size()); |