aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
diff options
context:
space:
mode:
authorzosimov <zosimov@yandex-team.ru>2022-02-10 16:50:32 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:32 +0300
commita1d67d6a31f789aa011250c3edce5751c0cadad2 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
parenta8f009e06d613c9567eb4c0f461dbed5e0d8092b (diff)
downloadydb-a1d67d6a31f789aa011250c3edce5751c0cadad2.tar.gz
Restoring authorship annotation for <zosimov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/histogram/adaptive/fixed_bin_histogram.cpp')
-rw-r--r--library/cpp/histogram/adaptive/fixed_bin_histogram.cpp946
1 files changed, 473 insertions, 473 deletions
diff --git a/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp b/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
index 44d0cc57c6..558aba9e2d 100644
--- a/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
+++ b/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
@@ -1,425 +1,425 @@
#include "fixed_bin_histogram.h"
-#include "auto_histogram.h"
+#include "auto_histogram.h"
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
#include <util/generic/algorithm.h>
-#include <util/generic/yexception.h>
-#include <util/generic/ymath.h>
-#include <util/string/printf.h>
-
+#include <util/generic/yexception.h>
+#include <util/generic/ymath.h>
+#include <util/string/printf.h>
+
namespace NKiwiAggr {
TFixedBinHistogram::TFixedBinHistogram(size_t intervals, ui64 id, size_t trainingSetSize)
- : TrainingSetSize(trainingSetSize)
- , IsInitialized(false)
- , IsEmpty(true)
- , Id(id)
- , Sum(0.0)
- , Freqs(0)
- , ReserveFreqs(0)
- , BinRange(0.0)
- , Intervals(intervals)
- , BaseIndex(intervals / 2)
- {
- }
-
+ : TrainingSetSize(trainingSetSize)
+ , IsInitialized(false)
+ , IsEmpty(true)
+ , Id(id)
+ , Sum(0.0)
+ , Freqs(0)
+ , ReserveFreqs(0)
+ , BinRange(0.0)
+ , Intervals(intervals)
+ , BaseIndex(intervals / 2)
+ {
+ }
+
TFixedBinHistogram::TFixedBinHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, size_t trainingSetSize)
- : TrainingSetSize(trainingSetSize)
- , IsInitialized(false)
- , IsEmpty(true)
- , Id(defaultId)
- , Sum(0.0)
- , Freqs(0)
- , ReserveFreqs(0)
- , BinRange(0.0)
- , Intervals(defaultIntervals)
- , BaseIndex(defaultIntervals / 2)
- {
- FromProto(histo);
- }
+ : TrainingSetSize(trainingSetSize)
+ , IsInitialized(false)
+ , IsEmpty(true)
+ , Id(defaultId)
+ , Sum(0.0)
+ , Freqs(0)
+ , ReserveFreqs(0)
+ , BinRange(0.0)
+ , Intervals(defaultIntervals)
+ , BaseIndex(defaultIntervals / 2)
+ {
+ FromProto(histo);
+ }
TFixedBinHistogram::TFixedBinHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, size_t trainingSetSize)
- : TrainingSetSize(trainingSetSize)
- , IsInitialized(false)
- , IsEmpty(true)
- , Id(defaultId)
- , Sum(0.0)
- , Freqs(0)
- , ReserveFreqs(0)
- , BinRange(0.0)
- , Intervals(defaultIntervals)
- , BaseIndex(defaultIntervals / 2)
- {
- TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histo);
- if (!fixedBinHisto) {
- FromIHistogram(histo);
- return;
- }
- fixedBinHisto->Initialize();
- TrainingSetSize = fixedBinHisto->TrainingSetSize;
- IsInitialized = fixedBinHisto->IsInitialized;
- IsEmpty = fixedBinHisto->IsEmpty;
- Id = fixedBinHisto->Id;
- MinValue = fixedBinHisto->MinValue;
- MaxValue = fixedBinHisto->MaxValue;
- Sum = fixedBinHisto->Sum;
- Freqs.assign(fixedBinHisto->Freqs.begin(), fixedBinHisto->Freqs.end());
- ReserveFreqs.assign(fixedBinHisto->ReserveFreqs.begin(), fixedBinHisto->ReserveFreqs.end());
- ReferencePoint = fixedBinHisto->ReferencePoint;
- BinRange = fixedBinHisto->BinRange;
- Intervals = fixedBinHisto->Intervals;
- FirstUsedBin = fixedBinHisto->FirstUsedBin;
- LastUsedBin = fixedBinHisto->LastUsedBin;
- BaseIndex = fixedBinHisto->BaseIndex;
- }
-
- void TFixedBinHistogram::Clear() {
- TrainingSet.Destroy();
- IsInitialized = false;
- IsEmpty = true;
- Sum = 0.0;
- Freqs.clear();
- ReserveFreqs.clear();
- BinRange = 0.0;
- }
-
- void TFixedBinHistogram::Add(const THistoRec& rec) {
- if (!rec.HasId() || rec.GetId() == Id) {
- Add(rec.GetValue(), rec.GetWeight());
- }
- }
-
- void TFixedBinHistogram::Add(double value, double weight) {
- if (!IsValidFloat(value) || !IsValidFloat(weight)) {
+ : TrainingSetSize(trainingSetSize)
+ , IsInitialized(false)
+ , IsEmpty(true)
+ , Id(defaultId)
+ , Sum(0.0)
+ , Freqs(0)
+ , ReserveFreqs(0)
+ , BinRange(0.0)
+ , Intervals(defaultIntervals)
+ , BaseIndex(defaultIntervals / 2)
+ {
+ TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histo);
+ if (!fixedBinHisto) {
+ FromIHistogram(histo);
+ return;
+ }
+ fixedBinHisto->Initialize();
+ TrainingSetSize = fixedBinHisto->TrainingSetSize;
+ IsInitialized = fixedBinHisto->IsInitialized;
+ IsEmpty = fixedBinHisto->IsEmpty;
+ Id = fixedBinHisto->Id;
+ MinValue = fixedBinHisto->MinValue;
+ MaxValue = fixedBinHisto->MaxValue;
+ Sum = fixedBinHisto->Sum;
+ Freqs.assign(fixedBinHisto->Freqs.begin(), fixedBinHisto->Freqs.end());
+ ReserveFreqs.assign(fixedBinHisto->ReserveFreqs.begin(), fixedBinHisto->ReserveFreqs.end());
+ ReferencePoint = fixedBinHisto->ReferencePoint;
+ BinRange = fixedBinHisto->BinRange;
+ Intervals = fixedBinHisto->Intervals;
+ FirstUsedBin = fixedBinHisto->FirstUsedBin;
+ LastUsedBin = fixedBinHisto->LastUsedBin;
+ BaseIndex = fixedBinHisto->BaseIndex;
+ }
+
+ void TFixedBinHistogram::Clear() {
+ TrainingSet.Destroy();
+ IsInitialized = false;
+ IsEmpty = true;
+ Sum = 0.0;
+ Freqs.clear();
+ ReserveFreqs.clear();
+ BinRange = 0.0;
+ }
+
+ void TFixedBinHistogram::Add(const THistoRec& rec) {
+ if (!rec.HasId() || rec.GetId() == Id) {
+ Add(rec.GetValue(), rec.GetWeight());
+ }
+ }
+
+ void TFixedBinHistogram::Add(double value, double weight) {
+ if (!IsValidFloat(value) || !IsValidFloat(weight)) {
ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
- }
-
- if (weight <= 0.0) {
- return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
- }
-
- Sum += weight;
-
- if (!IsInitialized) {
- if (!TrainingSet) {
+ }
+
+ if (weight <= 0.0) {
+ return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
+ }
+
+ Sum += weight;
+
+ if (!IsInitialized) {
+ if (!TrainingSet) {
TrainingSet.Reset(new TVector<TWeightedValue>(0));
- }
- TrainingSet->push_back(TWeightedValue(value, weight));
- if (TrainingSet->size() >= TrainingSetSize) {
- Initialize();
- }
- return;
- }
-
- i32 bin = CalcBin(value);
- if (bin < 0 || bin >= (i32)Freqs.size() || (BinRange == 0.0 && value != ReferencePoint)) {
- Shrink(Min(value, MinValue), Max(value, MaxValue));
- Freqs[CalcBin(value)] += weight;
- } else {
- MinValue = Min(value, MinValue);
- MaxValue = Max(value, MaxValue);
- FirstUsedBin = Min(FirstUsedBin, bin);
- LastUsedBin = Max(LastUsedBin, bin);
- Freqs[bin] += weight;
- }
- }
-
+ }
+ TrainingSet->push_back(TWeightedValue(value, weight));
+ if (TrainingSet->size() >= TrainingSetSize) {
+ Initialize();
+ }
+ return;
+ }
+
+ i32 bin = CalcBin(value);
+ if (bin < 0 || bin >= (i32)Freqs.size() || (BinRange == 0.0 && value != ReferencePoint)) {
+ Shrink(Min(value, MinValue), Max(value, MaxValue));
+ Freqs[CalcBin(value)] += weight;
+ } else {
+ MinValue = Min(value, MinValue);
+ MaxValue = Max(value, MaxValue);
+ FirstUsedBin = Min(FirstUsedBin, bin);
+ LastUsedBin = Max(LastUsedBin, bin);
+ Freqs[bin] += weight;
+ }
+ }
+
void TFixedBinHistogram::Merge(const THistogram& /*histo*/, double /*multiplier*/) {
ythrow yexception() << "Method is not implemented for TFixedBinHistogram";
}
void TFixedBinHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
TVector<IHistogramPtr> parsedHistogramsToMerge;
- for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
- parsedHistogramsToMerge.push_back(IHistogramPtr(new TAutoHistogram(histogramsToMerge[i], Intervals, Id)));
- }
- Merge(parsedHistogramsToMerge);
- }
-
+ for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
+ parsedHistogramsToMerge.push_back(IHistogramPtr(new TAutoHistogram(histogramsToMerge[i], Intervals, Id)));
+ }
+ Merge(parsedHistogramsToMerge);
+ }
+
void TFixedBinHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
TVector<IHistogramPtr> histogramsToMergeRepacked(0);
TVector<TFixedBinHistogram*> histograms(0);
-
- // put current histogram to the vector of histograms to merge and clear self
- if (!Empty()) {
- histogramsToMergeRepacked.push_back(IHistogramPtr(new TFixedBinHistogram(this, Intervals, Id, TrainingSetSize)));
- histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
- }
- Clear();
-
- for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
- if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
- continue;
- }
- TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histogramsToMerge[i].Get());
- if (fixedBinHisto) {
- fixedBinHisto->Initialize();
- histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
- } else {
+
+ // put current histogram to the vector of histograms to merge and clear self
+ if (!Empty()) {
+ histogramsToMergeRepacked.push_back(IHistogramPtr(new TFixedBinHistogram(this, Intervals, Id, TrainingSetSize)));
+ histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
+ }
+ Clear();
+
+ for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
+ if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
+ continue;
+ }
+ TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histogramsToMerge[i].Get());
+ if (fixedBinHisto) {
+ fixedBinHisto->Initialize();
+ histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
+ } else {
histogramsToMergeRepacked.push_back(IHistogramPtr(new TFixedBinHistogram(histogramsToMerge[i].Get(), Intervals, Id, TrainingSetSize))); // Convert histograms that are not of TFixedBinHistogram type
- }
- histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
- }
-
- if (histograms.size() == 0) {
- return;
- }
-
- double minValue = histograms[0]->MinValue;
- double maxValue = histograms[0]->MaxValue;
- Sum = histograms[0]->Sum;
- for (size_t i = 1; i < histograms.size(); ++i) {
- minValue = Min(minValue, histograms[i]->MinValue);
- maxValue = Max(maxValue, histograms[i]->MaxValue);
- Sum += histograms[i]->Sum;
- }
- SetFrame(minValue, maxValue, true);
-
- if (BinRange == 0.0) {
- Freqs[BaseIndex] = Sum;
- return;
- }
-
- for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
- TFixedBinHistogram* histo = histograms[histoIndex];
- for (i32 bin = histo->FirstUsedBin; bin <= histo->LastUsedBin; ++bin) {
- double binStart = histo->BinStart(bin);
- double binEnd = histo->BinEnd(bin);
- double freq = histo->Freqs[bin];
- if (binStart == binEnd) {
- Freqs[CalcBin(binStart)] += freq;
- continue;
- }
- size_t firstCross = CalcBin(binStart);
- size_t lastCross = CalcBin(binEnd);
- for (size_t i = firstCross; i <= lastCross; ++i) {
- double mergedBinStart = BinStart(i);
- double mergedBinEnd = BinEnd(i);
- double crossStart = Max(mergedBinStart, binStart);
- double crossEnd = Min(mergedBinEnd, binEnd);
- if (binStart == binEnd) {
- }
- Freqs[i] += freq * (crossEnd - crossStart) / (binEnd - binStart);
- }
- }
- }
- }
-
- void TFixedBinHistogram::Multiply(double factor) {
- if (!IsValidFloat(factor) || factor <= 0) {
- ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
- }
- if (!IsInitialized) {
- Initialize();
- }
- Sum *= factor;
+ }
+ histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
+ }
+
+ if (histograms.size() == 0) {
+ return;
+ }
+
+ double minValue = histograms[0]->MinValue;
+ double maxValue = histograms[0]->MaxValue;
+ Sum = histograms[0]->Sum;
+ for (size_t i = 1; i < histograms.size(); ++i) {
+ minValue = Min(minValue, histograms[i]->MinValue);
+ maxValue = Max(maxValue, histograms[i]->MaxValue);
+ Sum += histograms[i]->Sum;
+ }
+ SetFrame(minValue, maxValue, true);
+
+ if (BinRange == 0.0) {
+ Freqs[BaseIndex] = Sum;
+ return;
+ }
+
+ for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
+ TFixedBinHistogram* histo = histograms[histoIndex];
+ for (i32 bin = histo->FirstUsedBin; bin <= histo->LastUsedBin; ++bin) {
+ double binStart = histo->BinStart(bin);
+ double binEnd = histo->BinEnd(bin);
+ double freq = histo->Freqs[bin];
+ if (binStart == binEnd) {
+ Freqs[CalcBin(binStart)] += freq;
+ continue;
+ }
+ size_t firstCross = CalcBin(binStart);
+ size_t lastCross = CalcBin(binEnd);
+ for (size_t i = firstCross; i <= lastCross; ++i) {
+ double mergedBinStart = BinStart(i);
+ double mergedBinEnd = BinEnd(i);
+ double crossStart = Max(mergedBinStart, binStart);
+ double crossEnd = Min(mergedBinEnd, binEnd);
+ if (binStart == binEnd) {
+ }
+ Freqs[i] += freq * (crossEnd - crossStart) / (binEnd - binStart);
+ }
+ }
+ }
+ }
+
+ void TFixedBinHistogram::Multiply(double factor) {
+ if (!IsValidFloat(factor) || factor <= 0) {
+ ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
+ }
+ if (!IsInitialized) {
+ Initialize();
+ }
+ Sum *= factor;
for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- Freqs[i] *= factor;
- }
- }
-
- void TFixedBinHistogram::FromProto(const THistogram& histo) {
- if (histo.HasType() && histo.GetType() != HT_FIXED_BIN_HISTOGRAM) {
- ythrow yexception() << "Attempt to parse TFixedBinHistogram from THistogram protobuf record of wrong type = " << (ui32)histo.GetType();
- }
- TrainingSet.Destroy();
- IsInitialized = false;
- Sum = 0.0;
-
- Id = histo.GetId();
- size_t intervals = histo.FreqSize();
- if (intervals == 0) {
- IsEmpty = true;
- return;
- }
- Intervals = intervals;
- TrainingSetSize = Intervals;
- BaseIndex = Intervals / 2;
-
- if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue()) || !IsValidFloat(histo.GetBinRange())) {
+ Freqs[i] *= factor;
+ }
+ }
+
+ void TFixedBinHistogram::FromProto(const THistogram& histo) {
+ if (histo.HasType() && histo.GetType() != HT_FIXED_BIN_HISTOGRAM) {
+ ythrow yexception() << "Attempt to parse TFixedBinHistogram from THistogram protobuf record of wrong type = " << (ui32)histo.GetType();
+ }
+ TrainingSet.Destroy();
+ IsInitialized = false;
+ Sum = 0.0;
+
+ Id = histo.GetId();
+ size_t intervals = histo.FreqSize();
+ if (intervals == 0) {
+ IsEmpty = true;
+ return;
+ }
+ Intervals = intervals;
+ TrainingSetSize = Intervals;
+ BaseIndex = Intervals / 2;
+
+ if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue()) || !IsValidFloat(histo.GetBinRange())) {
ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f binrange %f", Id, histo.GetMinValue(), histo.GetMaxValue(), histo.GetBinRange());
- }
-
- double minValue = histo.GetMinValue();
- double binRange = histo.GetBinRange();
- double maxValue = histo.HasMaxValue() ? histo.GetMaxValue() : minValue + binRange * Intervals;
- SetFrame(minValue, maxValue, true);
- BinRange = binRange;
- for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- Freqs[i] = histo.GetFreq(i - BaseIndex);
- if (!IsValidFloat(Freqs[i])) {
+ }
+
+ double minValue = histo.GetMinValue();
+ double binRange = histo.GetBinRange();
+ double maxValue = histo.HasMaxValue() ? histo.GetMaxValue() : minValue + binRange * Intervals;
+ SetFrame(minValue, maxValue, true);
+ BinRange = binRange;
+ for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
+ Freqs[i] = histo.GetFreq(i - BaseIndex);
+ if (!IsValidFloat(Freqs[i])) {
ythrow yexception() << Sprintf("FromProto in histogram id %lu: bad value %f", Id, Freqs[i]);
- }
- Sum += Freqs[i];
- }
- }
-
- void TFixedBinHistogram::ToProto(THistogram& histo) {
- histo.Clear();
- if (!IsInitialized) {
- Initialize();
- }
- histo.SetType(HT_FIXED_BIN_HISTOGRAM);
- histo.SetId(Id);
- if (IsEmpty) {
- return;
- }
- if (FirstUsedBin < (i32)BaseIndex || (LastUsedBin - FirstUsedBin + 1) > (i32)Intervals) {
- Shrink(MinValue, MaxValue);
- }
- histo.SetMinValue(MinValue);
- histo.SetMaxValue(MaxValue);
- histo.SetBinRange(BinRange);
- for (ui32 i = BaseIndex; i < BaseIndex + Intervals; ++i) {
- histo.AddFreq(Freqs[i]);
- }
- }
-
+ }
+ Sum += Freqs[i];
+ }
+ }
+
+ void TFixedBinHistogram::ToProto(THistogram& histo) {
+ histo.Clear();
+ if (!IsInitialized) {
+ Initialize();
+ }
+ histo.SetType(HT_FIXED_BIN_HISTOGRAM);
+ histo.SetId(Id);
+ if (IsEmpty) {
+ return;
+ }
+ if (FirstUsedBin < (i32)BaseIndex || (LastUsedBin - FirstUsedBin + 1) > (i32)Intervals) {
+ Shrink(MinValue, MaxValue);
+ }
+ histo.SetMinValue(MinValue);
+ histo.SetMaxValue(MaxValue);
+ histo.SetBinRange(BinRange);
+ for (ui32 i = BaseIndex; i < BaseIndex + Intervals; ++i) {
+ histo.AddFreq(Freqs[i]);
+ }
+ }
+
void TFixedBinHistogram::SetId(ui64 id) {
- Id = id;
- }
-
+ Id = id;
+ }
+
ui64 TFixedBinHistogram::GetId() {
- return Id;
- }
-
- bool TFixedBinHistogram::Empty() {
- if (!IsInitialized) {
- Initialize();
- }
- return IsEmpty;
- }
-
- double TFixedBinHistogram::GetMinValue() {
- if (!IsInitialized) {
- Initialize();
- }
- return MinValue;
- }
-
- double TFixedBinHistogram::GetMaxValue() {
- if (!IsInitialized) {
- Initialize();
- }
- return MaxValue;
- }
-
- double TFixedBinHistogram::GetSum() {
- return Sum;
- }
-
- double TFixedBinHistogram::GetSumInRange(double leftBound, double rightBound) {
- if (!IsInitialized) {
- Initialize();
- }
- if (leftBound > rightBound) {
- return 0.0;
- }
- return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
- }
-
- double TFixedBinHistogram::GetSumAboveBound(double bound) {
- if (!IsInitialized) {
- Initialize();
- }
- if (IsEmpty) {
- return 0.0;
- }
+ return Id;
+ }
+
+ bool TFixedBinHistogram::Empty() {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ return IsEmpty;
+ }
+
+ double TFixedBinHistogram::GetMinValue() {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ return MinValue;
+ }
+
+ double TFixedBinHistogram::GetMaxValue() {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ return MaxValue;
+ }
+
+ double TFixedBinHistogram::GetSum() {
+ return Sum;
+ }
+
+ double TFixedBinHistogram::GetSumInRange(double leftBound, double rightBound) {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ if (leftBound > rightBound) {
+ return 0.0;
+ }
+ return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
+ }
+
+ double TFixedBinHistogram::GetSumAboveBound(double bound) {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ if (IsEmpty) {
+ return 0.0;
+ }
if (BinRange == 0.0) { // special case - all values added to histogram are the same
- return (bound <= ReferencePoint) ? Sum : 0.0;
- }
- i32 bin = CalcBin(bound);
- if (bin < FirstUsedBin) {
- return Sum;
- }
- if (bin > LastUsedBin) {
- return 0.0;
- }
- double binStart = BinStart(bin);
- double binEnd = BinEnd(bin);
+ return (bound <= ReferencePoint) ? Sum : 0.0;
+ }
+ i32 bin = CalcBin(bound);
+ if (bin < FirstUsedBin) {
+ return Sum;
+ }
+ if (bin > LastUsedBin) {
+ return 0.0;
+ }
+ double binStart = BinStart(bin);
+ double binEnd = BinEnd(bin);
double result = (bound < binStart) ? Freqs[bin] : Freqs[bin] * (binEnd - bound) / (binEnd - binStart);
- for (i32 i = bin + 1; i <= LastUsedBin; ++i) {
- result += Freqs[i];
- }
- return result;
- }
-
- double TFixedBinHistogram::GetSumBelowBound(double bound) {
- if (!IsInitialized) {
- Initialize();
- }
- if (IsEmpty) {
- return 0.0;
- }
+ for (i32 i = bin + 1; i <= LastUsedBin; ++i) {
+ result += Freqs[i];
+ }
+ return result;
+ }
+
+ double TFixedBinHistogram::GetSumBelowBound(double bound) {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ if (IsEmpty) {
+ return 0.0;
+ }
if (BinRange == 0.0) { // special case - all values added to histogram are the same
- return (bound > ReferencePoint) ? Sum : 0.0;
- }
- i32 bin = CalcBin(bound);
- if (bin < FirstUsedBin) {
- return 0.0;
- }
- if (bin > LastUsedBin) {
- return Sum;
- }
- double binStart = BinStart(bin);
- double binEnd = BinEnd(bin);
+ return (bound > ReferencePoint) ? Sum : 0.0;
+ }
+ i32 bin = CalcBin(bound);
+ if (bin < FirstUsedBin) {
+ return 0.0;
+ }
+ if (bin > LastUsedBin) {
+ return Sum;
+ }
+ double binStart = BinStart(bin);
+ double binEnd = BinEnd(bin);
double result = (bound > binEnd) ? Freqs[bin] : Freqs[bin] * (bound - binStart) / (binEnd - binStart);
- for (i32 i = bin - 1; i >= FirstUsedBin; --i) {
- result += Freqs[i];
- }
- return result;
- }
-
- double TFixedBinHistogram::CalcUpperBound(double sum) {
- if (!IsInitialized) {
- Initialize();
- }
+ for (i32 i = bin - 1; i >= FirstUsedBin; --i) {
+ result += Freqs[i];
+ }
+ return result;
+ }
+
+ double TFixedBinHistogram::CalcUpperBound(double sum) {
+ if (!IsInitialized) {
+ Initialize();
+ }
if (sum == 0.0) {
return MinValue;
}
- if (IsEmpty) {
- return MaxValue;
- }
- i32 currentBin = FirstUsedBin;
- double gatheredSum = 0.0;
- while (gatheredSum < sum && currentBin <= LastUsedBin) {
- gatheredSum += Freqs[currentBin];
- ++currentBin;
- }
- --currentBin;
+ if (IsEmpty) {
+ return MaxValue;
+ }
+ i32 currentBin = FirstUsedBin;
+ double gatheredSum = 0.0;
+ while (gatheredSum < sum && currentBin <= LastUsedBin) {
+ gatheredSum += Freqs[currentBin];
+ ++currentBin;
+ }
+ --currentBin;
if ((gatheredSum <= sum && currentBin == LastUsedBin) || (Freqs[currentBin] == 0)) {
- return MaxValue;
- }
- double binStart = BinStart(currentBin);
- double binEnd = BinEnd(currentBin);
- return binEnd - (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
- }
-
- double TFixedBinHistogram::CalcLowerBound(double sum) {
- if (!IsInitialized) {
- Initialize();
- }
+ return MaxValue;
+ }
+ double binStart = BinStart(currentBin);
+ double binEnd = BinEnd(currentBin);
+ return binEnd - (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
+ }
+
+ double TFixedBinHistogram::CalcLowerBound(double sum) {
+ if (!IsInitialized) {
+ Initialize();
+ }
if (sum == 0.0) {
return MaxValue;
}
- if (IsEmpty) {
- return MinValue;
- }
- i32 currentBin = LastUsedBin;
- double gatheredSum = 0.0;
- while (gatheredSum < sum && currentBin >= FirstUsedBin) {
- gatheredSum += Freqs[currentBin];
- --currentBin;
- }
- ++currentBin;
+ if (IsEmpty) {
+ return MinValue;
+ }
+ i32 currentBin = LastUsedBin;
+ double gatheredSum = 0.0;
+ while (gatheredSum < sum && currentBin >= FirstUsedBin) {
+ gatheredSum += Freqs[currentBin];
+ --currentBin;
+ }
+ ++currentBin;
if ((gatheredSum <= sum && currentBin == FirstUsedBin) || (Freqs[currentBin] == 0)) {
- return MinValue;
- }
- double binStart = BinStart(currentBin);
- double binEnd = BinEnd(currentBin);
- return binStart + (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
- }
-
+ return MinValue;
+ }
+ double binStart = BinStart(currentBin);
+ double binEnd = BinEnd(currentBin);
+ return binStart + (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
+ }
+
double TFixedBinHistogram::CalcUpperBoundSafe(double sum) {
if (!Empty()) {
sum = Max(Freqs[FirstUsedBin], sum);
@@ -434,67 +434,67 @@ namespace NKiwiAggr {
return CalcLowerBound(sum);
}
- double TFixedBinHistogram::CalcBinRange(double referencePoint, double maxValue) {
- return (maxValue - referencePoint) / ((double)Intervals - 0.02);
- }
-
- void TFixedBinHistogram::SetFrame(double minValue, double maxValue, bool clear) {
- MinValue = minValue;
- MaxValue = maxValue;
- ReferencePoint = MinValue;
- BinRange = CalcBinRange(ReferencePoint, MaxValue);
- FirstUsedBin = BaseIndex;
- LastUsedBin = (BinRange == 0.0) ? BaseIndex : BaseIndex + Intervals - 1;
- if (clear) {
- Freqs.assign(2 * Intervals, 0.0);
- ReserveFreqs.assign(2 * Intervals, 0.0);
- IsEmpty = false;
- IsInitialized = true;
- }
- }
-
- void TFixedBinHistogram::FromIHistogram(IHistogram* histo) {
- if (!histo) {
- ythrow yexception() << "Attempt to create TFixedBinFistogram from a NULL pointer";
- }
- Id = histo->GetId();
- if (histo->Empty()) {
- IsInitialized = false;
- IsEmpty = true;
- return;
- }
- SetFrame(histo->GetMinValue(), histo->GetMaxValue(), true);
- Sum = histo->GetSum();
- if (BinRange == 0.0) {
- Freqs[BaseIndex] = Sum;
- return;
- }
- for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- Freqs[i] = histo->GetSumInRange(BinStart(i), BinEnd(i));
- }
- return;
- }
-
- void TFixedBinHistogram::Initialize() {
- if (IsInitialized) {
- return;
- }
- if (!TrainingSet || TrainingSet->size() == 0) {
- IsEmpty = true;
- return;
- }
- SetFrame(MinElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first,
- MaxElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first, true);
+ double TFixedBinHistogram::CalcBinRange(double referencePoint, double maxValue) {
+ return (maxValue - referencePoint) / ((double)Intervals - 0.02);
+ }
+
+ void TFixedBinHistogram::SetFrame(double minValue, double maxValue, bool clear) {
+ MinValue = minValue;
+ MaxValue = maxValue;
+ ReferencePoint = MinValue;
+ BinRange = CalcBinRange(ReferencePoint, MaxValue);
+ FirstUsedBin = BaseIndex;
+ LastUsedBin = (BinRange == 0.0) ? BaseIndex : BaseIndex + Intervals - 1;
+ if (clear) {
+ Freqs.assign(2 * Intervals, 0.0);
+ ReserveFreqs.assign(2 * Intervals, 0.0);
+ IsEmpty = false;
+ IsInitialized = true;
+ }
+ }
+
+ void TFixedBinHistogram::FromIHistogram(IHistogram* histo) {
+ if (!histo) {
+ ythrow yexception() << "Attempt to create TFixedBinFistogram from a NULL pointer";
+ }
+ Id = histo->GetId();
+ if (histo->Empty()) {
+ IsInitialized = false;
+ IsEmpty = true;
+ return;
+ }
+ SetFrame(histo->GetMinValue(), histo->GetMaxValue(), true);
+ Sum = histo->GetSum();
+ if (BinRange == 0.0) {
+ Freqs[BaseIndex] = Sum;
+ return;
+ }
+ for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
+ Freqs[i] = histo->GetSumInRange(BinStart(i), BinEnd(i));
+ }
+ return;
+ }
+
+ void TFixedBinHistogram::Initialize() {
+ if (IsInitialized) {
+ return;
+ }
+ if (!TrainingSet || TrainingSet->size() == 0) {
+ IsEmpty = true;
+ return;
+ }
+ SetFrame(MinElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first,
+ MaxElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first, true);
for (TVector<TWeightedValue>::const_iterator it = TrainingSet->begin(); it != TrainingSet->end(); ++it) {
- Freqs[CalcBin(it->first)] += it->second;
- }
- TrainingSet.Destroy();
- }
-
- i32 TFixedBinHistogram::CalcBin(double value) {
- return (BinRange == 0.0) ? BaseIndex : static_cast<i32>(BaseIndex + (value - ReferencePoint) / BinRange);
- }
-
+ Freqs[CalcBin(it->first)] += it->second;
+ }
+ TrainingSet.Destroy();
+ }
+
+ i32 TFixedBinHistogram::CalcBin(double value) {
+ return (BinRange == 0.0) ? BaseIndex : static_cast<i32>(BaseIndex + (value - ReferencePoint) / BinRange);
+ }
+
double TFixedBinHistogram::CalcDensity(double value) {
i32 bin = CalcBin(value);
if (bin < 0 || bin >= (i32)Freqs.size() || BinRange == 0.0 || GetSum() == 0) {
@@ -503,36 +503,36 @@ namespace NKiwiAggr {
return Freqs[bin] / GetSum() / BinRange;
}
- double TFixedBinHistogram::BinStart(i32 i) {
- return Max(ReferencePoint + (i - BaseIndex) * BinRange, MinValue);
- }
-
- double TFixedBinHistogram::BinEnd(i32 i) {
- return Min(ReferencePoint + (i + 1 - BaseIndex) * BinRange, MaxValue);
- }
-
- void TFixedBinHistogram::Shrink(double newReferencePoint, double newMaxValue) {
+ double TFixedBinHistogram::BinStart(i32 i) {
+ return Max(ReferencePoint + (i - BaseIndex) * BinRange, MinValue);
+ }
+
+ double TFixedBinHistogram::BinEnd(i32 i) {
+ return Min(ReferencePoint + (i + 1 - BaseIndex) * BinRange, MaxValue);
+ }
+
+ void TFixedBinHistogram::Shrink(double newReferencePoint, double newMaxValue) {
Y_VERIFY(newReferencePoint < newMaxValue, "Invalid Shrink()");
- memset(&(ReserveFreqs[0]), 0, ReserveFreqs.size() * sizeof(double));
-
- double newBinRange = CalcBinRange(newReferencePoint, newMaxValue);
+ memset(&(ReserveFreqs[0]), 0, ReserveFreqs.size() * sizeof(double));
+
+ double newBinRange = CalcBinRange(newReferencePoint, newMaxValue);
for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- double binStart = BinStart(i);
- double binEnd = BinEnd(i);
- double freq = Freqs[i];
- i32 firstCross = static_cast<i32>(BaseIndex + (binStart - newReferencePoint) / newBinRange);
- i32 lastCross = static_cast<i32>(BaseIndex + (binEnd - newReferencePoint) / newBinRange);
- for (i32 j = firstCross; j <= lastCross; ++j) {
- double newBinStart = newReferencePoint + (j - BaseIndex) * newBinRange;
- double newBinEnd = newReferencePoint + (j + 1 - BaseIndex) * newBinRange;
- double crossStart = Max(newBinStart, binStart);
- double crossEnd = Min(newBinEnd, binEnd);
- ReserveFreqs[j] += (binStart == binEnd) ? freq : freq * (crossEnd - crossStart) / (binEnd - binStart);
- }
- }
-
- Freqs.swap(ReserveFreqs);
- SetFrame(newReferencePoint, newMaxValue, false);
- }
-
+ double binStart = BinStart(i);
+ double binEnd = BinEnd(i);
+ double freq = Freqs[i];
+ i32 firstCross = static_cast<i32>(BaseIndex + (binStart - newReferencePoint) / newBinRange);
+ i32 lastCross = static_cast<i32>(BaseIndex + (binEnd - newReferencePoint) / newBinRange);
+ for (i32 j = firstCross; j <= lastCross; ++j) {
+ double newBinStart = newReferencePoint + (j - BaseIndex) * newBinRange;
+ double newBinEnd = newReferencePoint + (j + 1 - BaseIndex) * newBinRange;
+ double crossStart = Max(newBinStart, binStart);
+ double crossEnd = Min(newBinEnd, binEnd);
+ ReserveFreqs[j] += (binStart == binEnd) ? freq : freq * (crossEnd - crossStart) / (binEnd - binStart);
+ }
+ }
+
+ Freqs.swap(ReserveFreqs);
+ SetFrame(newReferencePoint, newMaxValue, false);
+ }
+
}