aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/histogram
diff options
context:
space:
mode:
authorzosimov <zosimov@yandex-team.ru>2022-02-10 16:50:32 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:32 +0300
commita1d67d6a31f789aa011250c3edce5751c0cadad2 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/histogram
parenta8f009e06d613c9567eb4c0f461dbed5e0d8092b (diff)
downloadydb-a1d67d6a31f789aa011250c3edce5751c0cadad2.tar.gz
Restoring authorship annotation for <zosimov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/histogram')
-rw-r--r--library/cpp/histogram/adaptive/adaptive_histogram.cpp850
-rw-r--r--library/cpp/histogram/adaptive/adaptive_histogram.h120
-rw-r--r--library/cpp/histogram/adaptive/auto_histogram.h216
-rw-r--r--library/cpp/histogram/adaptive/block_histogram.cpp106
-rw-r--r--library/cpp/histogram/adaptive/block_histogram.h10
-rw-r--r--library/cpp/histogram/adaptive/fixed_bin_histogram.cpp946
-rw-r--r--library/cpp/histogram/adaptive/fixed_bin_histogram.h122
-rw-r--r--library/cpp/histogram/adaptive/histogram.h84
-rw-r--r--library/cpp/histogram/adaptive/merger.h130
-rw-r--r--library/cpp/histogram/adaptive/multi_histogram.h230
-rw-r--r--library/cpp/histogram/adaptive/protos/histo.proto28
-rw-r--r--library/cpp/histogram/adaptive/protos/ya.make2
-rw-r--r--library/cpp/histogram/adaptive/ya.make2
13 files changed, 1423 insertions, 1423 deletions
diff --git a/library/cpp/histogram/adaptive/adaptive_histogram.cpp b/library/cpp/histogram/adaptive/adaptive_histogram.cpp
index e73b45c2c6..cbfc494021 100644
--- a/library/cpp/histogram/adaptive/adaptive_histogram.cpp
+++ b/library/cpp/histogram/adaptive/adaptive_histogram.cpp
@@ -1,72 +1,72 @@
-#include "adaptive_histogram.h"
-
+#include "adaptive_histogram.h"
+
#include <util/generic/algorithm.h>
-#include <util/generic/yexception.h>
-#include <util/generic/ymath.h>
-#include <util/string/printf.h>
-
+#include <util/generic/yexception.h>
+#include <util/generic/ymath.h>
+#include <util/string/printf.h>
+
#include <util/system/backtrace.h>
-namespace NKiwiAggr {
+namespace NKiwiAggr {
TAdaptiveHistogram::TAdaptiveHistogram(size_t intervals, ui64 id, TQualityFunction qualityFunc)
- : Id(id)
- , Sum(0.0)
- , Intervals(intervals)
- , CalcQuality(qualityFunc)
- {
- }
-
+ : Id(id)
+ , Sum(0.0)
+ , Intervals(intervals)
+ , CalcQuality(qualityFunc)
+ {
+ }
+
TAdaptiveHistogram::TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
: TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
- {
- FromProto(histo);
- }
-
+ {
+ FromProto(histo);
+ }
+
TAdaptiveHistogram::TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
: TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
- {
- TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo);
- if (!adaptiveHisto) {
- FromIHistogram(histo);
- return;
- }
- Id = adaptiveHisto->Id;
- MinValue = adaptiveHisto->MinValue;
- MaxValue = adaptiveHisto->MaxValue;
- Sum = adaptiveHisto->Sum;
- Intervals = adaptiveHisto->Intervals;
- Bins = adaptiveHisto->Bins;
- BinsByQuality = adaptiveHisto->BinsByQuality;
+ {
+ TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo);
+ if (!adaptiveHisto) {
+ FromIHistogram(histo);
+ return;
+ }
+ Id = adaptiveHisto->Id;
+ MinValue = adaptiveHisto->MinValue;
+ MaxValue = adaptiveHisto->MaxValue;
+ Sum = adaptiveHisto->Sum;
+ Intervals = adaptiveHisto->Intervals;
+ Bins = adaptiveHisto->Bins;
+ BinsByQuality = adaptiveHisto->BinsByQuality;
if (CalcQuality == nullptr) {
- CalcQuality = adaptiveHisto->CalcQuality;
- }
- }
-
+ CalcQuality = adaptiveHisto->CalcQuality;
+ }
+ }
+
TQualityFunction TAdaptiveHistogram::GetQualityFunc() {
- return CalcQuality;
- }
-
- void TAdaptiveHistogram::Clear() {
- Sum = 0.0;
- Bins.clear();
- BinsByQuality.clear();
- }
-
- void TAdaptiveHistogram::Add(const THistoRec& histoRec) {
- if (!histoRec.HasId() || histoRec.GetId() == Id) {
- Add(histoRec.GetValue(), histoRec.GetWeight());
- }
- }
-
- void TAdaptiveHistogram::Add(double value, double weight) {
- if (!IsValidFloat(value) || !IsValidFloat(weight)) {
+ return CalcQuality;
+ }
+
+ void TAdaptiveHistogram::Clear() {
+ Sum = 0.0;
+ Bins.clear();
+ BinsByQuality.clear();
+ }
+
+ void TAdaptiveHistogram::Add(const THistoRec& histoRec) {
+ if (!histoRec.HasId() || histoRec.GetId() == Id) {
+ Add(histoRec.GetValue(), histoRec.GetWeight());
+ }
+ }
+
+ void TAdaptiveHistogram::Add(double value, double weight) {
+ if (!IsValidFloat(value) || !IsValidFloat(weight)) {
ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
- }
- TWeightedValue weightedValue(value, weight);
- Add(weightedValue, true);
+ }
+ TWeightedValue weightedValue(value, weight);
+ Add(weightedValue, true);
PrecomputedBins.clear();
- }
-
+ }
+
void TAdaptiveHistogram::Merge(const THistogram& histo, double multiplier) {
if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
fprintf(stderr, "Merging in histogram id %lu: skip bad histo with minvalue %f maxvalue %f\n", Id, histo.GetMinValue(), histo.GetMaxValue());
@@ -87,7 +87,7 @@ namespace NKiwiAggr {
if (!IsValidFloat(value) || !IsValidFloat(weight)) {
fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
continue;
- }
+ }
Add(value, weight * multiplier);
}
@@ -99,20 +99,20 @@ namespace NKiwiAggr {
double weight = histo.GetFreq(j);
if (!IsValidFloat(pos) || !IsValidFloat(weight)) {
fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, pos, weight);
- pos += histo.GetBinRange();
+ pos += histo.GetBinRange();
continue;
- }
+ }
Add(pos, weight * multiplier);
pos += histo.GetBinRange();
}
-
+
MinValue = Min(MinValue, histo.GetMinValue());
MaxValue = Max(MaxValue, histo.GetMaxValue());
} else {
ythrow yexception() << "Unknown THistogram type";
- }
- }
-
+ }
+ }
+
void TAdaptiveHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
Merge(histogramsToMerge[i], 1.0);
@@ -122,64 +122,64 @@ namespace NKiwiAggr {
void TAdaptiveHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
TVector<IHistogramPtr> histogramsToMergeRepacked(0);
TVector<TAdaptiveHistogram*> histograms(0);
- for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
- if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
- continue;
- }
- TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get());
- if (adaptiveHisto) {
- histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
- } else {
+ for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
+ if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
+ continue;
+ }
+ TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get());
+ if (adaptiveHisto) {
+ histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
+ } else {
histogramsToMergeRepacked.push_back(IHistogramPtr(new TAdaptiveHistogram(histogramsToMerge[i].Get(), Intervals, Id, CalcQuality))); // Convert histograms that are not of TFixedBinHistogram type
- }
- if (histogramsToMergeRepacked.back()->Empty()) {
- continue;
- }
- histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get()));
- }
-
- if (histograms.size() == 0) {
- return;
- }
-
- for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
- TAdaptiveHistogram* histo = histograms[histoIndex];
- for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) {
- Add(*it, true);
- }
- }
-
- for (size_t i = 0; i < histograms.size(); ++i) {
- MinValue = Min(MinValue, histograms[i]->MinValue);
- MaxValue = Max(MaxValue, histograms[i]->MaxValue);
- }
- }
-
- void TAdaptiveHistogram::Multiply(double factor) {
- if (!IsValidFloat(factor) || factor <= 0) {
- ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
- }
- Sum *= factor;
-
- TPairSet newBins;
- for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
- newBins.insert(TWeightedValue(it->first, it->second * factor));
- }
- Bins = newBins;
-
- TPairSet newBinsByQuality;
- for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
- TPairSet::iterator rightBin = it;
- ++rightBin;
- if (rightBin == Bins.end()) {
- break;
- }
- newBinsByQuality.insert(CalcQuality(*it, *rightBin));
- }
- BinsByQuality = newBinsByQuality;
- }
-
- void TAdaptiveHistogram::FromProto(const THistogram& histo) {
+ }
+ if (histogramsToMergeRepacked.back()->Empty()) {
+ continue;
+ }
+ histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get()));
+ }
+
+ if (histograms.size() == 0) {
+ return;
+ }
+
+ for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
+ TAdaptiveHistogram* histo = histograms[histoIndex];
+ for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) {
+ Add(*it, true);
+ }
+ }
+
+ for (size_t i = 0; i < histograms.size(); ++i) {
+ MinValue = Min(MinValue, histograms[i]->MinValue);
+ MaxValue = Max(MaxValue, histograms[i]->MaxValue);
+ }
+ }
+
+ void TAdaptiveHistogram::Multiply(double factor) {
+ if (!IsValidFloat(factor) || factor <= 0) {
+ ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
+ }
+ Sum *= factor;
+
+ TPairSet newBins;
+ for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
+ newBins.insert(TWeightedValue(it->first, it->second * factor));
+ }
+ Bins = newBins;
+
+ TPairSet newBinsByQuality;
+ for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
+ TPairSet::iterator rightBin = it;
+ ++rightBin;
+ if (rightBin == Bins.end()) {
+ break;
+ }
+ newBinsByQuality.insert(CalcQuality(*it, *rightBin));
+ }
+ BinsByQuality = newBinsByQuality;
+ }
+
+ void TAdaptiveHistogram::FromProto(const THistogram& histo) {
Y_VERIFY(histo.HasType(), "Attempt to parse TAdaptiveHistogram from THistogram protobuf with no Type field set");
;
switch (histo.GetType()) { // check that histogram type could be deduced
@@ -193,107 +193,107 @@ namespace NKiwiAggr {
[[fallthrough]];
default: // not ok
ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record of type = " << (ui32)histo.GetType();
- }
+ }
- if (histo.FreqSize() != histo.PositionSize()) {
- ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize();
- }
+ if (histo.FreqSize() != histo.PositionSize()) {
+ ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize();
+ }
if (CalcQuality == nullptr) {
- if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) {
- CalcQuality = CalcDistanceQuality;
- } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) {
- CalcQuality = CalcWeightQuality;
+ if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) {
+ CalcQuality = CalcDistanceQuality;
+ } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) {
+ CalcQuality = CalcWeightQuality;
} else if (histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM) {
CalcQuality = CalcWardQuality;
- } else {
- ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function";
- }
- }
- Id = histo.GetId();
- Sum = 0.0;
- Intervals = Max(Intervals, histo.FreqSize());
- for (size_t i = 0; i < histo.FreqSize(); ++i) {
- double value = histo.GetPosition(i);
- double weight = histo.GetFreq(i);
- if (!IsValidFloat(value) || !IsValidFloat(weight)) {
+ } else {
+ ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function";
+ }
+ }
+ Id = histo.GetId();
+ Sum = 0.0;
+ Intervals = Max(Intervals, histo.FreqSize());
+ for (size_t i = 0; i < histo.FreqSize(); ++i) {
+ double value = histo.GetPosition(i);
+ double weight = histo.GetFreq(i);
+ if (!IsValidFloat(value) || !IsValidFloat(weight)) {
fprintf(stderr, "FromProto in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
- continue;
- }
- Add(value, weight);
- }
-
- if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
+ continue;
+ }
+ Add(value, weight);
+ }
+
+ if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue());
- }
-
- MinValue = histo.GetMinValue();
- MaxValue = histo.GetMaxValue();
- }
-
- void TAdaptiveHistogram::ToProto(THistogram& histo) {
- histo.Clear();
- if (CalcQuality == CalcDistanceQuality) {
- histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM);
- } else if (CalcQuality == CalcWeightQuality) {
- histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM);
+ }
+
+ MinValue = histo.GetMinValue();
+ MaxValue = histo.GetMaxValue();
+ }
+
+ void TAdaptiveHistogram::ToProto(THistogram& histo) {
+ histo.Clear();
+ if (CalcQuality == CalcDistanceQuality) {
+ histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM);
+ } else if (CalcQuality == CalcWeightQuality) {
+ histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM);
} else if (CalcQuality == CalcWardQuality) {
histo.SetType(HT_ADAPTIVE_WARD_HISTOGRAM);
- } else {
- histo.SetType(HT_ADAPTIVE_HISTOGRAM);
- }
- histo.SetId(Id);
- if (Empty()) {
- return;
- }
- histo.SetMinValue(MinValue);
- histo.SetMaxValue(MaxValue);
- for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) {
- histo.AddFreq(it->second);
- histo.AddPosition(it->first);
- }
- }
-
+ } else {
+ histo.SetType(HT_ADAPTIVE_HISTOGRAM);
+ }
+ histo.SetId(Id);
+ if (Empty()) {
+ return;
+ }
+ histo.SetMinValue(MinValue);
+ histo.SetMaxValue(MaxValue);
+ for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) {
+ histo.AddFreq(it->second);
+ histo.AddPosition(it->first);
+ }
+ }
+
void TAdaptiveHistogram::SetId(ui64 id) {
- Id = id;
- }
-
+ Id = id;
+ }
+
ui64 TAdaptiveHistogram::GetId() {
- return Id;
- }
-
- bool TAdaptiveHistogram::Empty() {
- return Bins.size() == 0;
- }
-
- double TAdaptiveHistogram::GetMinValue() {
- return MinValue;
- }
-
- double TAdaptiveHistogram::GetMaxValue() {
- return MaxValue;
- }
-
- double TAdaptiveHistogram::GetSum() {
- return Sum;
- }
-
- double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) {
- if (leftBound > rightBound) {
- return 0.0;
- }
- return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
- }
-
- double TAdaptiveHistogram::GetSumAboveBound(double bound) {
- if (Empty()) {
- return 0.0;
- }
- if (bound < MinValue) {
- return Sum;
- }
- if (bound > MaxValue) {
- return 0.0;
- }
+ return Id;
+ }
+
+ bool TAdaptiveHistogram::Empty() {
+ return Bins.size() == 0;
+ }
+
+ double TAdaptiveHistogram::GetMinValue() {
+ return MinValue;
+ }
+
+ double TAdaptiveHistogram::GetMaxValue() {
+ return MaxValue;
+ }
+
+ double TAdaptiveHistogram::GetSum() {
+ return Sum;
+ }
+
+ double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) {
+ if (leftBound > rightBound) {
+ return 0.0;
+ }
+ return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
+ }
+
+ double TAdaptiveHistogram::GetSumAboveBound(double bound) {
+ if (Empty()) {
+ return 0.0;
+ }
+ if (bound < MinValue) {
+ return Sum;
+ }
+ if (bound > MaxValue) {
+ return 0.0;
+ }
if (!PrecomputedBins.empty()) {
return GetSumAboveBoundImpl(
@@ -314,19 +314,19 @@ namespace NKiwiAggr {
}
return sum;
});
- }
- }
-
- double TAdaptiveHistogram::GetSumBelowBound(double bound) {
- if (Empty()) {
- return 0.0;
- }
- if (bound < MinValue) {
- return 0.0;
- }
- if (bound > MaxValue) {
- return Sum;
- }
+ }
+ }
+
+ double TAdaptiveHistogram::GetSumBelowBound(double bound) {
+ if (Empty()) {
+ return 0.0;
+ }
+ if (bound < MinValue) {
+ return 0.0;
+ }
+ if (bound > MaxValue) {
+ return Sum;
+ }
if (!PrecomputedBins.empty()) {
return GetSumBelowBoundImpl(
@@ -346,90 +346,90 @@ namespace NKiwiAggr {
}
return sum;
});
- }
- }
-
- double TAdaptiveHistogram::CalcUpperBound(double sum) {
+ }
+ }
+
+ double TAdaptiveHistogram::CalcUpperBound(double sum) {
Y_VERIFY(sum >= 0, "Sum must be >= 0");
if (sum == 0.0) {
return MinValue;
}
- if (Empty()) {
- return MaxValue;
- }
- TPairSet::iterator current = Bins.begin();
- double gatheredSum = 0.0;
- while (current != Bins.end() && gatheredSum < sum) {
- gatheredSum += current->second;
- ++current;
- }
- --current;
- if (gatheredSum < sum) {
- return MaxValue;
- }
- TWeightedValue left(MinValue, 0.0);
- TWeightedValue right(MaxValue, 0.0);
- if (current != Bins.begin()) {
- TPairSet::iterator leftBin = current;
- --leftBin;
- left = *leftBin;
- }
- {
- TPairSet::iterator rightBin = current;
- ++rightBin;
- if (rightBin != Bins.end()) {
- right = *rightBin;
- }
- }
- double sumToAdd = sum - (gatheredSum - current->second - left.second / 2);
- if (sumToAdd <= ((current->second + left.second) / 2)) {
- return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second);
- } else {
- sumToAdd -= (current->second + left.second) / 2;
- return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second);
- }
- }
-
- double TAdaptiveHistogram::CalcLowerBound(double sum) {
+ if (Empty()) {
+ return MaxValue;
+ }
+ TPairSet::iterator current = Bins.begin();
+ double gatheredSum = 0.0;
+ while (current != Bins.end() && gatheredSum < sum) {
+ gatheredSum += current->second;
+ ++current;
+ }
+ --current;
+ if (gatheredSum < sum) {
+ return MaxValue;
+ }
+ TWeightedValue left(MinValue, 0.0);
+ TWeightedValue right(MaxValue, 0.0);
+ if (current != Bins.begin()) {
+ TPairSet::iterator leftBin = current;
+ --leftBin;
+ left = *leftBin;
+ }
+ {
+ TPairSet::iterator rightBin = current;
+ ++rightBin;
+ if (rightBin != Bins.end()) {
+ right = *rightBin;
+ }
+ }
+ double sumToAdd = sum - (gatheredSum - current->second - left.second / 2);
+ if (sumToAdd <= ((current->second + left.second) / 2)) {
+ return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second);
+ } else {
+ sumToAdd -= (current->second + left.second) / 2;
+ return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second);
+ }
+ }
+
+ double TAdaptiveHistogram::CalcLowerBound(double sum) {
Y_VERIFY(sum >= 0, "Sum must be >= 0");
if (sum == 0.0) {
return MaxValue;
}
- if (Empty()) {
- return MinValue;
- }
- TPairSet::iterator current = Bins.end();
- double gatheredSum = 0.0;
- while (current != Bins.begin() && gatheredSum < sum) {
- --current;
- gatheredSum += current->second;
- }
- if (gatheredSum < sum) {
- return MinValue;
- }
- TWeightedValue left(MinValue, 0.0);
- TWeightedValue right(MaxValue, 0.0);
- if (current != Bins.begin()) {
- TPairSet::iterator leftBin = current;
- --leftBin;
- left = *leftBin;
- }
- {
- TPairSet::iterator rightBin = current;
- ++rightBin;
- if (rightBin != Bins.end()) {
- right = *rightBin;
- }
- }
- double sumToAdd = sum - (gatheredSum - current->second - right.second / 2);
- if (sumToAdd <= ((current->second + right.second) / 2)) {
- return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second);
- } else {
- sumToAdd -= (current->second + right.second) / 2;
- return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second);
- }
- }
-
+ if (Empty()) {
+ return MinValue;
+ }
+ TPairSet::iterator current = Bins.end();
+ double gatheredSum = 0.0;
+ while (current != Bins.begin() && gatheredSum < sum) {
+ --current;
+ gatheredSum += current->second;
+ }
+ if (gatheredSum < sum) {
+ return MinValue;
+ }
+ TWeightedValue left(MinValue, 0.0);
+ TWeightedValue right(MaxValue, 0.0);
+ if (current != Bins.begin()) {
+ TPairSet::iterator leftBin = current;
+ --leftBin;
+ left = *leftBin;
+ }
+ {
+ TPairSet::iterator rightBin = current;
+ ++rightBin;
+ if (rightBin != Bins.end()) {
+ right = *rightBin;
+ }
+ }
+ double sumToAdd = sum - (gatheredSum - current->second - right.second / 2);
+ if (sumToAdd <= ((current->second + right.second) / 2)) {
+ return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second);
+ } else {
+ sumToAdd -= (current->second + right.second) / 2;
+ return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second);
+ }
+ }
+
double TAdaptiveHistogram::CalcUpperBoundSafe(double sum) {
if (!Empty()) {
sum = Max(Bins.begin()->second, sum);
@@ -444,145 +444,145 @@ namespace NKiwiAggr {
return CalcLowerBound(sum);
}
- void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) {
- if (!histo) {
- ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer";
- }
+ void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) {
+ if (!histo) {
+ ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer";
+ }
if (CalcQuality == CalcWardQuality) {
ythrow yexception() << "Not implemented";
} else if (CalcQuality != CalcDistanceQuality && CalcQuality != CalcWeightQuality) {
- ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality";
- }
- Id = histo->GetId();
- if (histo->Empty()) {
- return;
- }
- double sum = histo->GetSum();
- double minValue = histo->GetMinValue();
- double maxValue = histo->GetMaxValue();
- if (minValue == maxValue) {
- Add(minValue, sum);
- return;
- }
- if (CalcQuality == CalcDistanceQuality) {
- double binRange = (maxValue - minValue) / (Intervals);
- for (size_t i = 0; i < Intervals; ++i) {
- Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1)));
- }
- } else if (CalcQuality == CalcWeightQuality && sum != 0.0) {
- double slab = sum / Intervals;
- double prevBound = minValue;
- for (size_t i = 0; i < Intervals; ++i) {
- double bound = histo->CalcUpperBound(slab * (i + 1));
- Add((bound + prevBound) / 2, slab);
- prevBound = bound;
- }
- }
- MinValue = minValue;
- MaxValue = maxValue;
- }
-
- void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) {
- const double& value = weightedValue.first;
- const double& weight = weightedValue.second;
- if (weight <= 0.0) {
- return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
- }
- if (initial) {
- Sum += weight;
- }
- if (Bins.size() == 0) {
- MinValue = value;
- MaxValue = value;
- Bins.insert(weightedValue);
- return;
- }
- if (value < MinValue) {
- MinValue = value;
- }
- if (value > MaxValue) {
- MaxValue = value;
- }
- TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0));
- if (rightBin != Bins.end() && rightBin->first == value) {
- TPairSet::iterator currentBin = rightBin;
- ++rightBin;
- TWeightedValue newBin(value, weight + currentBin->second);
- if (rightBin != Bins.end()) {
+ ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality";
+ }
+ Id = histo->GetId();
+ if (histo->Empty()) {
+ return;
+ }
+ double sum = histo->GetSum();
+ double minValue = histo->GetMinValue();
+ double maxValue = histo->GetMaxValue();
+ if (minValue == maxValue) {
+ Add(minValue, sum);
+ return;
+ }
+ if (CalcQuality == CalcDistanceQuality) {
+ double binRange = (maxValue - minValue) / (Intervals);
+ for (size_t i = 0; i < Intervals; ++i) {
+ Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1)));
+ }
+ } else if (CalcQuality == CalcWeightQuality && sum != 0.0) {
+ double slab = sum / Intervals;
+ double prevBound = minValue;
+ for (size_t i = 0; i < Intervals; ++i) {
+ double bound = histo->CalcUpperBound(slab * (i + 1));
+ Add((bound + prevBound) / 2, slab);
+ prevBound = bound;
+ }
+ }
+ MinValue = minValue;
+ MaxValue = maxValue;
+ }
+
+ void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) {
+ const double& value = weightedValue.first;
+ const double& weight = weightedValue.second;
+ if (weight <= 0.0) {
+ return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
+ }
+ if (initial) {
+ Sum += weight;
+ }
+ if (Bins.size() == 0) {
+ MinValue = value;
+ MaxValue = value;
+ Bins.insert(weightedValue);
+ return;
+ }
+ if (value < MinValue) {
+ MinValue = value;
+ }
+ if (value > MaxValue) {
+ MaxValue = value;
+ }
+ TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0));
+ if (rightBin != Bins.end() && rightBin->first == value) {
+ TPairSet::iterator currentBin = rightBin;
+ ++rightBin;
+ TWeightedValue newBin(value, weight + currentBin->second);
+ if (rightBin != Bins.end()) {
Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) == 1, "Erase failed");
- BinsByQuality.insert(CalcQuality(newBin, *rightBin));
- }
- if (currentBin != Bins.begin()) {
- TPairSet::iterator leftBin = currentBin;
- --leftBin;
+ BinsByQuality.insert(CalcQuality(newBin, *rightBin));
+ }
+ if (currentBin != Bins.begin()) {
+ TPairSet::iterator leftBin = currentBin;
+ --leftBin;
Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) == 1, "Erase failed");
- BinsByQuality.insert(CalcQuality(*leftBin, newBin));
- }
- Bins.erase(currentBin);
- Bins.insert(newBin);
- return;
- }
- if (rightBin == Bins.begin()) {
- BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
- } else {
- TPairSet::iterator leftBin = rightBin;
- --leftBin;
- if (rightBin == Bins.end()) {
- BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
- } else {
+ BinsByQuality.insert(CalcQuality(*leftBin, newBin));
+ }
+ Bins.erase(currentBin);
+ Bins.insert(newBin);
+ return;
+ }
+ if (rightBin == Bins.begin()) {
+ BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
+ } else {
+ TPairSet::iterator leftBin = rightBin;
+ --leftBin;
+ if (rightBin == Bins.end()) {
+ BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
+ } else {
Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *rightBin)) == 1, "Erase failed");
- BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
- BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
- }
- }
- Bins.insert(weightedValue);
- if (Bins.size() > Intervals) {
- Shrink();
- }
- }
-
- void TAdaptiveHistogram::Erase(double value) {
- TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0));
+ BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
+ BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
+ }
+ }
+ Bins.insert(weightedValue);
+ if (Bins.size() > Intervals) {
+ Shrink();
+ }
+ }
+
+ void TAdaptiveHistogram::Erase(double value) {
+ TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0));
Y_VERIFY(currentBin != Bins.end() && currentBin->first == value, "Can't find bin that should be erased");
- TPairSet::iterator rightBin = currentBin;
- ++rightBin;
- if (currentBin == Bins.begin()) {
+ TPairSet::iterator rightBin = currentBin;
+ ++rightBin;
+ if (currentBin == Bins.begin()) {
Y_VERIFY(rightBin != Bins.end(), "No right bin for the first bin");
Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
- } else {
- TPairSet::iterator leftBin = currentBin;
- --leftBin;
- if (rightBin == Bins.end()) {
+ } else {
+ TPairSet::iterator leftBin = currentBin;
+ --leftBin;
+ if (rightBin == Bins.end()) {
Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
- } else {
+ } else {
Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
- BinsByQuality.insert(CalcQuality(*leftBin, *rightBin));
- }
- }
- Bins.erase(currentBin);
- }
-
- void TAdaptiveHistogram::Shrink() {
- TPairSet::iterator worstBin = BinsByQuality.begin();
+ BinsByQuality.insert(CalcQuality(*leftBin, *rightBin));
+ }
+ }
+ Bins.erase(currentBin);
+ }
+
+ void TAdaptiveHistogram::Shrink() {
+ TPairSet::iterator worstBin = BinsByQuality.begin();
Y_VERIFY(worstBin != BinsByQuality.end(), "No right bin for the first bin");
- TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0));
+ TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0));
Y_VERIFY(leftBin != Bins.end() && leftBin->first == worstBin->second, "Can't find worst bin");
- TPairSet::iterator rightBin = leftBin;
- ++rightBin;
+ TPairSet::iterator rightBin = leftBin;
+ ++rightBin;
Y_VERIFY(rightBin != Bins.end(), "Can't find right bin");
-
- TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second);
- if (Bins.size() > 2) {
- Erase(leftBin->first);
- Erase(rightBin->first);
- } else {
- Bins.clear();
- BinsByQuality.clear();
- }
- Add(newBin, false);
- }
-
+
+ TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second);
+ if (Bins.size() > 2) {
+ Erase(leftBin->first);
+ Erase(rightBin->first);
+ } else {
+ Bins.clear();
+ BinsByQuality.clear();
+ }
+ Add(newBin, false);
+ }
+
void TAdaptiveHistogram::PrecomputePartialSums() {
PrecomputedBins.clear();
PrecomputedBins.reserve(Bins.size());
diff --git a/library/cpp/histogram/adaptive/adaptive_histogram.h b/library/cpp/histogram/adaptive/adaptive_histogram.h
index daf772584a..fa8f48433f 100644
--- a/library/cpp/histogram/adaptive/adaptive_histogram.h
+++ b/library/cpp/histogram/adaptive/adaptive_histogram.h
@@ -1,22 +1,22 @@
-#pragma once
-
-#include "histogram.h"
+#pragma once
+
+#include "histogram.h"
#include "common.h"
-
+
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
-
-#include <util/generic/ptr.h>
-#include <util/generic/set.h>
-#include <util/generic/vector.h>
-
-namespace NKiwiAggr {
+
+#include <util/generic/ptr.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+
+namespace NKiwiAggr {
class TAdaptiveHistogram: private TNonCopyable, public IHistogram {
- protected:
- static const size_t DEFAULT_INTERVALS = 100;
-
- private:
+ protected:
+ static const size_t DEFAULT_INTERVALS = 100;
+
+ private:
using TPairSet = TSet<TWeightedValue>;
-
+
struct TFastBin {
// these names are for compatibility with TWeightedValue
double first;
@@ -39,40 +39,40 @@ namespace NKiwiAggr {
};
ui64 Id;
- double MinValue;
- double MaxValue;
- double Sum;
- size_t Intervals;
- TPairSet Bins;
- TPairSet BinsByQuality;
- TQualityFunction CalcQuality;
-
+ double MinValue;
+ double MaxValue;
+ double Sum;
+ size_t Intervals;
+ TPairSet Bins;
+ TPairSet BinsByQuality;
+ TQualityFunction CalcQuality;
+
TVector<TFastBin> PrecomputedBins;
- public:
+ public:
TAdaptiveHistogram(size_t intervals, ui64 id = 0, TQualityFunction qualityFunc = CalcWeightQuality);
TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0, TQualityFunction qualityFunc = nullptr);
TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0, TQualityFunction qualityFunc = CalcWeightQuality);
-
+
~TAdaptiveHistogram() override {
- }
-
- TQualityFunction GetQualityFunc();
-
+ }
+
+ TQualityFunction GetQualityFunc();
+
void Clear() override;
-
+
void Add(double value, double weight) override;
void Add(const THistoRec& histoRec) override;
-
+
void Merge(const THistogram& histo, double multiplier) final;
void Merge(const TVector<THistogram>& histogramsToMerge) final;
void Merge(TVector<IHistogramPtr> histogramsToMerge) final;
-
+
void Multiply(double factor) final;
-
+
void FromProto(const THistogram& histo) final;
void ToProto(THistogram& histo) final;
-
+
void SetId(ui64 id) final;
ui64 GetId() final;
bool Empty() final;
@@ -86,46 +86,46 @@ namespace NKiwiAggr {
double CalcLowerBound(double sum) final;
double CalcUpperBoundSafe(double sum) final;
double CalcLowerBoundSafe(double sum) final;
-
+
void PrecomputePartialSums() final;
- private:
- void FromIHistogram(IHistogram* histo);
- void Add(const TWeightedValue& weightedValue, bool initial);
- void Erase(double value);
- void Shrink();
+ private:
+ void FromIHistogram(IHistogram* histo);
+ void Add(const TWeightedValue& weightedValue, bool initial);
+ void Erase(double value);
+ void Shrink();
template <typename TBins, typename TGetSumAbove>
double GetSumAboveBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumAbove& getSumAbove) const;
template <typename TBins, typename TGetSumBelow>
double GetSumBelowBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumBelow& getSumBelow) const;
- };
-
+ };
+
template <TQualityFunction QualityFunction>
- class TDefinedAdaptiveHistogram: public TAdaptiveHistogram {
- public:
+ class TDefinedAdaptiveHistogram: public TAdaptiveHistogram {
+ public:
TDefinedAdaptiveHistogram(size_t intervals, ui64 id = 0)
- : TAdaptiveHistogram(intervals, id, QualityFunction)
- {
- }
-
+ : TAdaptiveHistogram(intervals, id, QualityFunction)
+ {
+ }
+
TDefinedAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0)
- : TAdaptiveHistogram(histo, defaultIntervals, defaultId, QualityFunction)
- {
- }
-
+ : TAdaptiveHistogram(histo, defaultIntervals, defaultId, QualityFunction)
+ {
+ }
+
TDefinedAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0)
- : TAdaptiveHistogram(histo, defaultIntervals, defaultId, QualityFunction)
- {
- }
-
+ : TAdaptiveHistogram(histo, defaultIntervals, defaultId, QualityFunction)
+ {
+ }
+
~TDefinedAdaptiveHistogram() override {
- }
- };
-
+ }
+ };
+
typedef TDefinedAdaptiveHistogram<CalcDistanceQuality> TAdaptiveDistanceHistogram;
typedef TDefinedAdaptiveHistogram<CalcWeightQuality> TAdaptiveWeightHistogram;
typedef TDefinedAdaptiveHistogram<CalcWardQuality> TAdaptiveWardHistogram;
-
+
}
diff --git a/library/cpp/histogram/adaptive/auto_histogram.h b/library/cpp/histogram/adaptive/auto_histogram.h
index 57c4c7b7fe..9fdf0b9abe 100644
--- a/library/cpp/histogram/adaptive/auto_histogram.h
+++ b/library/cpp/histogram/adaptive/auto_histogram.h
@@ -1,136 +1,136 @@
-#pragma once
-
-#include "adaptive_histogram.h"
-#include "fixed_bin_histogram.h"
-#include "histogram.h"
-
+#pragma once
+
+#include "adaptive_histogram.h"
+#include "fixed_bin_histogram.h"
+#include "histogram.h"
+
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
-
-#include <util/generic/ptr.h>
+
+#include <util/generic/ptr.h>
#include <util/generic/yexception.h>
-
-namespace NKiwiAggr {
+
+namespace NKiwiAggr {
class TAutoHistogram: private TNonCopyable, public IHistogram {
- private:
- static const size_t DEFAULT_INTERVALS = 100;
-
+ private:
+ static const size_t DEFAULT_INTERVALS = 100;
+
ui64 Id;
- size_t Intervals;
- THolder<IHistogram> HistogramImpl;
-
- public:
+ size_t Intervals;
+ THolder<IHistogram> HistogramImpl;
+
+ public:
TAutoHistogram(size_t intervals, ui64 id = 0) {
Y_UNUSED(intervals);
Y_UNUSED(id);
- ythrow yexception() << "Empty constructor is not defined for TAutoHistogram";
- }
-
+ ythrow yexception() << "Empty constructor is not defined for TAutoHistogram";
+ }
+
TAutoHistogram(const THistogram& histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0)
- : Id(defaultId)
- , Intervals(defaultIntervals)
- {
- FromProto(histo);
- }
-
+ : Id(defaultId)
+ , Intervals(defaultIntervals)
+ {
+ FromProto(histo);
+ }
+
TAutoHistogram(IHistogram* histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0) {
Y_UNUSED(histo);
Y_UNUSED(defaultIntervals);
Y_UNUSED(defaultId);
- ythrow yexception() << "IHistogram constructor is not defined for TAutoHistogram";
- }
-
+ ythrow yexception() << "IHistogram constructor is not defined for TAutoHistogram";
+ }
+
virtual ~TAutoHistogram() {
- }
-
- virtual void Clear() {
- HistogramImpl->Clear();
- }
-
- virtual void Add(double value, double weight) {
- HistogramImpl->Add(value, weight);
- }
-
- virtual void Add(const THistoRec& histoRec) {
- HistogramImpl->Add(histoRec);
- }
-
+ }
+
+ virtual void Clear() {
+ HistogramImpl->Clear();
+ }
+
+ virtual void Add(double value, double weight) {
+ HistogramImpl->Add(value, weight);
+ }
+
+ virtual void Add(const THistoRec& histoRec) {
+ HistogramImpl->Add(histoRec);
+ }
+
virtual void Merge(const THistogram& histo, double multiplier) {
HistogramImpl->Merge(histo, multiplier);
}
virtual void Merge(const TVector<THistogram>& histogramsToMerge) {
- HistogramImpl->Merge(histogramsToMerge);
- }
-
+ HistogramImpl->Merge(histogramsToMerge);
+ }
+
virtual void Merge(TVector<IHistogramPtr> histogramsToMerge) {
- HistogramImpl->Merge(histogramsToMerge);
- }
-
- virtual void Multiply(double factor) {
- HistogramImpl->Multiply(factor);
- }
-
- virtual void FromProto(const THistogram& histo) {
- if (!histo.HasType() || histo.GetType() == HT_FIXED_BIN_HISTOGRAM) {
- HistogramImpl.Reset(new TFixedBinHistogram(histo, Intervals, Id));
+ HistogramImpl->Merge(histogramsToMerge);
+ }
+
+ virtual void Multiply(double factor) {
+ HistogramImpl->Multiply(factor);
+ }
+
+ virtual void FromProto(const THistogram& histo) {
+ if (!histo.HasType() || histo.GetType() == HT_FIXED_BIN_HISTOGRAM) {
+ HistogramImpl.Reset(new TFixedBinHistogram(histo, Intervals, Id));
} else if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) {
- HistogramImpl.Reset(new TAdaptiveDistanceHistogram(histo, Intervals, Id));
+ HistogramImpl.Reset(new TAdaptiveDistanceHistogram(histo, Intervals, Id));
} else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) {
- HistogramImpl.Reset(new TAdaptiveWeightHistogram(histo, Intervals, Id));
+ HistogramImpl.Reset(new TAdaptiveWeightHistogram(histo, Intervals, Id));
} else if (histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM) {
HistogramImpl.Reset(new TAdaptiveWardHistogram(histo, Intervals, Id));
} else {
ythrow yexception() << "Can't parse TAutoHistogram from a THistogram of type " << (ui32)histo.GetType();
- }
- }
-
- virtual void ToProto(THistogram& histo) {
- HistogramImpl->ToProto(histo);
- }
-
+ }
+ }
+
+ virtual void ToProto(THistogram& histo) {
+ HistogramImpl->ToProto(histo);
+ }
+
virtual void SetId(ui64 id) {
- HistogramImpl->SetId(id);
- }
-
+ HistogramImpl->SetId(id);
+ }
+
virtual ui64 GetId() {
- return HistogramImpl->GetId();
- }
-
- virtual bool Empty() {
- return HistogramImpl->Empty();
- }
-
- virtual double GetMinValue() {
- return HistogramImpl->GetMinValue();
- }
-
- virtual double GetMaxValue() {
- return HistogramImpl->GetMaxValue();
- }
-
- virtual double GetSum() {
- return HistogramImpl->GetSum();
- }
-
- virtual double GetSumInRange(double leftBound, double rightBound) {
- return HistogramImpl->GetSumInRange(leftBound, rightBound);
- }
-
- virtual double GetSumAboveBound(double bound) {
- return HistogramImpl->GetSumAboveBound(bound);
- }
-
- virtual double GetSumBelowBound(double bound) {
- return HistogramImpl->GetSumBelowBound(bound);
- }
-
- virtual double CalcUpperBound(double sum) {
- return HistogramImpl->CalcUpperBound(sum);
- }
-
- virtual double CalcLowerBound(double sum) {
- return HistogramImpl->CalcLowerBound(sum);
- }
+ return HistogramImpl->GetId();
+ }
+
+ virtual bool Empty() {
+ return HistogramImpl->Empty();
+ }
+
+ virtual double GetMinValue() {
+ return HistogramImpl->GetMinValue();
+ }
+
+ virtual double GetMaxValue() {
+ return HistogramImpl->GetMaxValue();
+ }
+
+ virtual double GetSum() {
+ return HistogramImpl->GetSum();
+ }
+
+ virtual double GetSumInRange(double leftBound, double rightBound) {
+ return HistogramImpl->GetSumInRange(leftBound, rightBound);
+ }
+
+ virtual double GetSumAboveBound(double bound) {
+ return HistogramImpl->GetSumAboveBound(bound);
+ }
+
+ virtual double GetSumBelowBound(double bound) {
+ return HistogramImpl->GetSumBelowBound(bound);
+ }
+
+ virtual double CalcUpperBound(double sum) {
+ return HistogramImpl->CalcUpperBound(sum);
+ }
+
+ virtual double CalcLowerBound(double sum) {
+ return HistogramImpl->CalcLowerBound(sum);
+ }
virtual double CalcUpperBoundSafe(double sum) {
return HistogramImpl->CalcUpperBoundSafe(sum);
@@ -143,6 +143,6 @@ namespace NKiwiAggr {
virtual void PrecomputePartialSums() {
return HistogramImpl->PrecomputePartialSums();
}
- };
-
+ };
+
}
diff --git a/library/cpp/histogram/adaptive/block_histogram.cpp b/library/cpp/histogram/adaptive/block_histogram.cpp
index d1528b64ba..6586d13ff6 100644
--- a/library/cpp/histogram/adaptive/block_histogram.cpp
+++ b/library/cpp/histogram/adaptive/block_histogram.cpp
@@ -7,8 +7,8 @@
#include <util/generic/intrlist.h>
#include <util/generic/ptr.h>
#include <util/generic/queue.h>
-#include <util/generic/ymath.h>
-#include <util/string/printf.h>
+#include <util/generic/ymath.h>
+#include <util/string/printf.h>
namespace {
struct TEmpty {
@@ -97,11 +97,11 @@ namespace NKiwiAggr {
}
void TBlockHistogram::Clear() {
- PrevSize = 0;
- Sum = 0.0;
- MinValue = 0.0;
- MaxValue = 0.0;
- Bins.clear();
+ PrevSize = 0;
+ Sum = 0.0;
+ MinValue = 0.0;
+ MaxValue = 0.0;
+ Bins.clear();
}
void TBlockHistogram::Add(const THistoRec& rec) {
@@ -111,10 +111,10 @@ namespace NKiwiAggr {
}
void TBlockHistogram::Add(double value, double weight) {
- if (!IsValidFloat(value) || !IsValidFloat(weight)) {
+ if (!IsValidFloat(value) || !IsValidFloat(weight)) {
ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
- }
-
+ }
+
if (weight <= 0.0) {
return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
}
@@ -129,9 +129,9 @@ namespace NKiwiAggr {
Sum += weight;
- if (Bins.size() > ShrinkSize) {
+ if (Bins.size() > ShrinkSize) {
SortAndShrink(Intervals * SHRINK_MULTIPLIER);
- }
+ }
Bins.push_back(TWeightedValue(value, weight));
}
@@ -156,7 +156,7 @@ namespace NKiwiAggr {
if (!IsValidFloat(value) || !IsValidFloat(weight)) {
fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
continue;
- }
+ }
Add(value, weight * multiplier);
}
@@ -168,20 +168,20 @@ namespace NKiwiAggr {
double weight = histo.GetFreq(j);
if (!IsValidFloat(pos) || !IsValidFloat(weight)) {
fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, pos, weight);
- pos += histo.GetBinRange();
+ pos += histo.GetBinRange();
continue;
- }
+ }
Add(pos, weight * multiplier);
pos += histo.GetBinRange();
}
-
+
MinValue = Min(MinValue, histo.GetMinValue());
MaxValue = Max(MaxValue, histo.GetMaxValue());
} else {
ythrow yexception() << "Unknown THistogram type";
- }
- }
-
+ }
+ }
+
void TBlockHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
Merge(histogramsToMerge[i], 1.0);
@@ -191,18 +191,18 @@ namespace NKiwiAggr {
void TBlockHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
Y_UNUSED(histogramsToMerge);
ythrow yexception() << "IHistogram::Merge(TVector<IHistogramPtr>) is not defined for TBlockHistogram";
- }
-
+ }
+
void TBlockHistogram::Multiply(double factor) {
- if (!IsValidFloat(factor) || factor <= 0) {
- ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
- }
- Sum *= factor;
+ if (!IsValidFloat(factor) || factor <= 0) {
+ ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
+ }
+ Sum *= factor;
for (TVector<TWeightedValue>::iterator it = Bins.begin(); it != Bins.end(); ++it) {
- it->second *= factor;
- }
- }
-
+ it->second *= factor;
+ }
+ }
+
void TBlockHistogram::FromProto(const THistogram& histo) {
Y_VERIFY(histo.HasType(), "Attempt to parse TBlockHistogram from THistogram protobuf with no Type field set");
;
@@ -226,20 +226,20 @@ namespace NKiwiAggr {
Bins.resize(histo.FreqSize());
PrevSize = Bins.size();
for (size_t i = 0; i < histo.FreqSize(); ++i) {
- double value = histo.GetPosition(i);
- double weight = histo.GetFreq(i);
- if (!IsValidFloat(value) || !IsValidFloat(weight)) {
+ double value = histo.GetPosition(i);
+ double weight = histo.GetFreq(i);
+ if (!IsValidFloat(value) || !IsValidFloat(weight)) {
fprintf(stderr, "FromProto in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
- continue;
- }
- Bins[i].first = value;
- Bins[i].second = weight;
+ continue;
+ }
+ Bins[i].first = value;
+ Bins[i].second = weight;
Sum += Bins[i].second;
}
-
- if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
+
+ if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue());
- }
+ }
MinValue = histo.GetMinValue();
MaxValue = histo.GetMaxValue();
}
@@ -262,9 +262,9 @@ namespace NKiwiAggr {
}
void TBlockHistogram::SetId(ui64 id) {
- Id = id;
- }
-
+ Id = id;
+ }
+
ui64 TBlockHistogram::GetId() {
return Id;
}
@@ -274,11 +274,11 @@ namespace NKiwiAggr {
}
double TBlockHistogram::GetMinValue() {
- return MinValue;
+ return MinValue;
}
double TBlockHistogram::GetMaxValue() {
- return MaxValue;
+ return MaxValue;
}
double TBlockHistogram::GetSum() {
@@ -288,9 +288,9 @@ namespace NKiwiAggr {
void TBlockHistogram::SortAndShrink(size_t intervals, bool final) {
Y_VERIFY(intervals > 0);
- if (Bins.size() <= intervals) {
+ if (Bins.size() <= intervals) {
return;
- }
+ }
if (Bins.size() >= Intervals * GREEDY_SHRINK_MULTIPLIER) {
SortBins();
@@ -301,7 +301,7 @@ namespace NKiwiAggr {
SlowShrink(intervals);
}
- } else {
+ } else {
SortBins();
UniquifyBins();
SlowShrink(intervals);
@@ -351,9 +351,9 @@ namespace NKiwiAggr {
PrevSize = pos + 1;
}
- if (Bins.size() <= intervals) {
+ if (Bins.size() <= intervals) {
return;
- }
+ }
typedef TIntrusiveListItem<TEmpty> TListItem;
@@ -363,15 +363,15 @@ namespace NKiwiAggr {
TArrayHolder<TListItem> listElementsHolder(new TListItem[end + 1]);
TListItem* const bins = listElementsHolder.Get();
- for (ui32 i = 1; i <= end; ++i) {
+ for (ui32 i = 1; i <= end; ++i) {
bins[i].LinkAfter(&bins[i - 1]);
- }
+ }
TVector<double> pairWeights(n);
- for (ui32 i = 0; i < n; ++i) {
+ for (ui32 i = 0; i < n; ++i) {
pairWeights[i] = CalcQuality(Bins[i], Bins[i + 1]).first;
- }
+ }
TSmartHeap heap(pairWeights);
diff --git a/library/cpp/histogram/adaptive/block_histogram.h b/library/cpp/histogram/adaptive/block_histogram.h
index 3a8d91af01..266bb2f2b2 100644
--- a/library/cpp/histogram/adaptive/block_histogram.h
+++ b/library/cpp/histogram/adaptive/block_histogram.h
@@ -54,17 +54,17 @@ namespace NKiwiAggr {
virtual ~TBlockHistogram() {
}
- virtual void Clear();
-
+ virtual void Clear();
+
virtual void Add(double value, double weight);
virtual void Add(const THistoRec& histoRec);
virtual void Merge(const THistogram& histo, double multiplier);
virtual void Merge(const TVector<THistogram>& histogramsToMerge);
virtual void Merge(TVector<IHistogramPtr> histogramsToMerge); // not implemented
-
- virtual void Multiply(double factor);
-
+
+ virtual void Multiply(double factor);
+
virtual void FromProto(const THistogram& histo);
virtual void ToProto(THistogram& histo);
diff --git a/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp b/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
index 44d0cc57c6..558aba9e2d 100644
--- a/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
+++ b/library/cpp/histogram/adaptive/fixed_bin_histogram.cpp
@@ -1,425 +1,425 @@
#include "fixed_bin_histogram.h"
-#include "auto_histogram.h"
+#include "auto_histogram.h"
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
#include <util/generic/algorithm.h>
-#include <util/generic/yexception.h>
-#include <util/generic/ymath.h>
-#include <util/string/printf.h>
-
+#include <util/generic/yexception.h>
+#include <util/generic/ymath.h>
+#include <util/string/printf.h>
+
namespace NKiwiAggr {
TFixedBinHistogram::TFixedBinHistogram(size_t intervals, ui64 id, size_t trainingSetSize)
- : TrainingSetSize(trainingSetSize)
- , IsInitialized(false)
- , IsEmpty(true)
- , Id(id)
- , Sum(0.0)
- , Freqs(0)
- , ReserveFreqs(0)
- , BinRange(0.0)
- , Intervals(intervals)
- , BaseIndex(intervals / 2)
- {
- }
-
+ : TrainingSetSize(trainingSetSize)
+ , IsInitialized(false)
+ , IsEmpty(true)
+ , Id(id)
+ , Sum(0.0)
+ , Freqs(0)
+ , ReserveFreqs(0)
+ , BinRange(0.0)
+ , Intervals(intervals)
+ , BaseIndex(intervals / 2)
+ {
+ }
+
TFixedBinHistogram::TFixedBinHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, size_t trainingSetSize)
- : TrainingSetSize(trainingSetSize)
- , IsInitialized(false)
- , IsEmpty(true)
- , Id(defaultId)
- , Sum(0.0)
- , Freqs(0)
- , ReserveFreqs(0)
- , BinRange(0.0)
- , Intervals(defaultIntervals)
- , BaseIndex(defaultIntervals / 2)
- {
- FromProto(histo);
- }
+ : TrainingSetSize(trainingSetSize)
+ , IsInitialized(false)
+ , IsEmpty(true)
+ , Id(defaultId)
+ , Sum(0.0)
+ , Freqs(0)
+ , ReserveFreqs(0)
+ , BinRange(0.0)
+ , Intervals(defaultIntervals)
+ , BaseIndex(defaultIntervals / 2)
+ {
+ FromProto(histo);
+ }
TFixedBinHistogram::TFixedBinHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, size_t trainingSetSize)
- : TrainingSetSize(trainingSetSize)
- , IsInitialized(false)
- , IsEmpty(true)
- , Id(defaultId)
- , Sum(0.0)
- , Freqs(0)
- , ReserveFreqs(0)
- , BinRange(0.0)
- , Intervals(defaultIntervals)
- , BaseIndex(defaultIntervals / 2)
- {
- TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histo);
- if (!fixedBinHisto) {
- FromIHistogram(histo);
- return;
- }
- fixedBinHisto->Initialize();
- TrainingSetSize = fixedBinHisto->TrainingSetSize;
- IsInitialized = fixedBinHisto->IsInitialized;
- IsEmpty = fixedBinHisto->IsEmpty;
- Id = fixedBinHisto->Id;
- MinValue = fixedBinHisto->MinValue;
- MaxValue = fixedBinHisto->MaxValue;
- Sum = fixedBinHisto->Sum;
- Freqs.assign(fixedBinHisto->Freqs.begin(), fixedBinHisto->Freqs.end());
- ReserveFreqs.assign(fixedBinHisto->ReserveFreqs.begin(), fixedBinHisto->ReserveFreqs.end());
- ReferencePoint = fixedBinHisto->ReferencePoint;
- BinRange = fixedBinHisto->BinRange;
- Intervals = fixedBinHisto->Intervals;
- FirstUsedBin = fixedBinHisto->FirstUsedBin;
- LastUsedBin = fixedBinHisto->LastUsedBin;
- BaseIndex = fixedBinHisto->BaseIndex;
- }
-
- void TFixedBinHistogram::Clear() {
- TrainingSet.Destroy();
- IsInitialized = false;
- IsEmpty = true;
- Sum = 0.0;
- Freqs.clear();
- ReserveFreqs.clear();
- BinRange = 0.0;
- }
-
- void TFixedBinHistogram::Add(const THistoRec& rec) {
- if (!rec.HasId() || rec.GetId() == Id) {
- Add(rec.GetValue(), rec.GetWeight());
- }
- }
-
- void TFixedBinHistogram::Add(double value, double weight) {
- if (!IsValidFloat(value) || !IsValidFloat(weight)) {
+ : TrainingSetSize(trainingSetSize)
+ , IsInitialized(false)
+ , IsEmpty(true)
+ , Id(defaultId)
+ , Sum(0.0)
+ , Freqs(0)
+ , ReserveFreqs(0)
+ , BinRange(0.0)
+ , Intervals(defaultIntervals)
+ , BaseIndex(defaultIntervals / 2)
+ {
+ TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histo);
+ if (!fixedBinHisto) {
+ FromIHistogram(histo);
+ return;
+ }
+ fixedBinHisto->Initialize();
+ TrainingSetSize = fixedBinHisto->TrainingSetSize;
+ IsInitialized = fixedBinHisto->IsInitialized;
+ IsEmpty = fixedBinHisto->IsEmpty;
+ Id = fixedBinHisto->Id;
+ MinValue = fixedBinHisto->MinValue;
+ MaxValue = fixedBinHisto->MaxValue;
+ Sum = fixedBinHisto->Sum;
+ Freqs.assign(fixedBinHisto->Freqs.begin(), fixedBinHisto->Freqs.end());
+ ReserveFreqs.assign(fixedBinHisto->ReserveFreqs.begin(), fixedBinHisto->ReserveFreqs.end());
+ ReferencePoint = fixedBinHisto->ReferencePoint;
+ BinRange = fixedBinHisto->BinRange;
+ Intervals = fixedBinHisto->Intervals;
+ FirstUsedBin = fixedBinHisto->FirstUsedBin;
+ LastUsedBin = fixedBinHisto->LastUsedBin;
+ BaseIndex = fixedBinHisto->BaseIndex;
+ }
+
+ void TFixedBinHistogram::Clear() {
+ TrainingSet.Destroy();
+ IsInitialized = false;
+ IsEmpty = true;
+ Sum = 0.0;
+ Freqs.clear();
+ ReserveFreqs.clear();
+ BinRange = 0.0;
+ }
+
+ void TFixedBinHistogram::Add(const THistoRec& rec) {
+ if (!rec.HasId() || rec.GetId() == Id) {
+ Add(rec.GetValue(), rec.GetWeight());
+ }
+ }
+
+ void TFixedBinHistogram::Add(double value, double weight) {
+ if (!IsValidFloat(value) || !IsValidFloat(weight)) {
ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
- }
-
- if (weight <= 0.0) {
- return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
- }
-
- Sum += weight;
-
- if (!IsInitialized) {
- if (!TrainingSet) {
+ }
+
+ if (weight <= 0.0) {
+ return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
+ }
+
+ Sum += weight;
+
+ if (!IsInitialized) {
+ if (!TrainingSet) {
TrainingSet.Reset(new TVector<TWeightedValue>(0));
- }
- TrainingSet->push_back(TWeightedValue(value, weight));
- if (TrainingSet->size() >= TrainingSetSize) {
- Initialize();
- }
- return;
- }
-
- i32 bin = CalcBin(value);
- if (bin < 0 || bin >= (i32)Freqs.size() || (BinRange == 0.0 && value != ReferencePoint)) {
- Shrink(Min(value, MinValue), Max(value, MaxValue));
- Freqs[CalcBin(value)] += weight;
- } else {
- MinValue = Min(value, MinValue);
- MaxValue = Max(value, MaxValue);
- FirstUsedBin = Min(FirstUsedBin, bin);
- LastUsedBin = Max(LastUsedBin, bin);
- Freqs[bin] += weight;
- }
- }
-
+ }
+ TrainingSet->push_back(TWeightedValue(value, weight));
+ if (TrainingSet->size() >= TrainingSetSize) {
+ Initialize();
+ }
+ return;
+ }
+
+ i32 bin = CalcBin(value);
+ if (bin < 0 || bin >= (i32)Freqs.size() || (BinRange == 0.0 && value != ReferencePoint)) {
+ Shrink(Min(value, MinValue), Max(value, MaxValue));
+ Freqs[CalcBin(value)] += weight;
+ } else {
+ MinValue = Min(value, MinValue);
+ MaxValue = Max(value, MaxValue);
+ FirstUsedBin = Min(FirstUsedBin, bin);
+ LastUsedBin = Max(LastUsedBin, bin);
+ Freqs[bin] += weight;
+ }
+ }
+
void TFixedBinHistogram::Merge(const THistogram& /*histo*/, double /*multiplier*/) {
ythrow yexception() << "Method is not implemented for TFixedBinHistogram";
}
void TFixedBinHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
TVector<IHistogramPtr> parsedHistogramsToMerge;
- for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
- parsedHistogramsToMerge.push_back(IHistogramPtr(new TAutoHistogram(histogramsToMerge[i], Intervals, Id)));
- }
- Merge(parsedHistogramsToMerge);
- }
-
+ for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
+ parsedHistogramsToMerge.push_back(IHistogramPtr(new TAutoHistogram(histogramsToMerge[i], Intervals, Id)));
+ }
+ Merge(parsedHistogramsToMerge);
+ }
+
void TFixedBinHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
TVector<IHistogramPtr> histogramsToMergeRepacked(0);
TVector<TFixedBinHistogram*> histograms(0);
-
- // put current histogram to the vector of histograms to merge and clear self
- if (!Empty()) {
- histogramsToMergeRepacked.push_back(IHistogramPtr(new TFixedBinHistogram(this, Intervals, Id, TrainingSetSize)));
- histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
- }
- Clear();
-
- for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
- if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
- continue;
- }
- TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histogramsToMerge[i].Get());
- if (fixedBinHisto) {
- fixedBinHisto->Initialize();
- histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
- } else {
+
+ // put current histogram to the vector of histograms to merge and clear self
+ if (!Empty()) {
+ histogramsToMergeRepacked.push_back(IHistogramPtr(new TFixedBinHistogram(this, Intervals, Id, TrainingSetSize)));
+ histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
+ }
+ Clear();
+
+ for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
+ if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
+ continue;
+ }
+ TFixedBinHistogram* fixedBinHisto = dynamic_cast<TFixedBinHistogram*>(histogramsToMerge[i].Get());
+ if (fixedBinHisto) {
+ fixedBinHisto->Initialize();
+ histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
+ } else {
histogramsToMergeRepacked.push_back(IHistogramPtr(new TFixedBinHistogram(histogramsToMerge[i].Get(), Intervals, Id, TrainingSetSize))); // Convert histograms that are not of TFixedBinHistogram type
- }
- histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
- }
-
- if (histograms.size() == 0) {
- return;
- }
-
- double minValue = histograms[0]->MinValue;
- double maxValue = histograms[0]->MaxValue;
- Sum = histograms[0]->Sum;
- for (size_t i = 1; i < histograms.size(); ++i) {
- minValue = Min(minValue, histograms[i]->MinValue);
- maxValue = Max(maxValue, histograms[i]->MaxValue);
- Sum += histograms[i]->Sum;
- }
- SetFrame(minValue, maxValue, true);
-
- if (BinRange == 0.0) {
- Freqs[BaseIndex] = Sum;
- return;
- }
-
- for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
- TFixedBinHistogram* histo = histograms[histoIndex];
- for (i32 bin = histo->FirstUsedBin; bin <= histo->LastUsedBin; ++bin) {
- double binStart = histo->BinStart(bin);
- double binEnd = histo->BinEnd(bin);
- double freq = histo->Freqs[bin];
- if (binStart == binEnd) {
- Freqs[CalcBin(binStart)] += freq;
- continue;
- }
- size_t firstCross = CalcBin(binStart);
- size_t lastCross = CalcBin(binEnd);
- for (size_t i = firstCross; i <= lastCross; ++i) {
- double mergedBinStart = BinStart(i);
- double mergedBinEnd = BinEnd(i);
- double crossStart = Max(mergedBinStart, binStart);
- double crossEnd = Min(mergedBinEnd, binEnd);
- if (binStart == binEnd) {
- }
- Freqs[i] += freq * (crossEnd - crossStart) / (binEnd - binStart);
- }
- }
- }
- }
-
- void TFixedBinHistogram::Multiply(double factor) {
- if (!IsValidFloat(factor) || factor <= 0) {
- ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
- }
- if (!IsInitialized) {
- Initialize();
- }
- Sum *= factor;
+ }
+ histograms.push_back(dynamic_cast<TFixedBinHistogram*>(histogramsToMergeRepacked.back().Get()));
+ }
+
+ if (histograms.size() == 0) {
+ return;
+ }
+
+ double minValue = histograms[0]->MinValue;
+ double maxValue = histograms[0]->MaxValue;
+ Sum = histograms[0]->Sum;
+ for (size_t i = 1; i < histograms.size(); ++i) {
+ minValue = Min(minValue, histograms[i]->MinValue);
+ maxValue = Max(maxValue, histograms[i]->MaxValue);
+ Sum += histograms[i]->Sum;
+ }
+ SetFrame(minValue, maxValue, true);
+
+ if (BinRange == 0.0) {
+ Freqs[BaseIndex] = Sum;
+ return;
+ }
+
+ for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
+ TFixedBinHistogram* histo = histograms[histoIndex];
+ for (i32 bin = histo->FirstUsedBin; bin <= histo->LastUsedBin; ++bin) {
+ double binStart = histo->BinStart(bin);
+ double binEnd = histo->BinEnd(bin);
+ double freq = histo->Freqs[bin];
+ if (binStart == binEnd) {
+ Freqs[CalcBin(binStart)] += freq;
+ continue;
+ }
+ size_t firstCross = CalcBin(binStart);
+ size_t lastCross = CalcBin(binEnd);
+ for (size_t i = firstCross; i <= lastCross; ++i) {
+ double mergedBinStart = BinStart(i);
+ double mergedBinEnd = BinEnd(i);
+ double crossStart = Max(mergedBinStart, binStart);
+ double crossEnd = Min(mergedBinEnd, binEnd);
+ if (binStart == binEnd) {
+ }
+ Freqs[i] += freq * (crossEnd - crossStart) / (binEnd - binStart);
+ }
+ }
+ }
+ }
+
+ void TFixedBinHistogram::Multiply(double factor) {
+ if (!IsValidFloat(factor) || factor <= 0) {
+ ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
+ }
+ if (!IsInitialized) {
+ Initialize();
+ }
+ Sum *= factor;
for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- Freqs[i] *= factor;
- }
- }
-
- void TFixedBinHistogram::FromProto(const THistogram& histo) {
- if (histo.HasType() && histo.GetType() != HT_FIXED_BIN_HISTOGRAM) {
- ythrow yexception() << "Attempt to parse TFixedBinHistogram from THistogram protobuf record of wrong type = " << (ui32)histo.GetType();
- }
- TrainingSet.Destroy();
- IsInitialized = false;
- Sum = 0.0;
-
- Id = histo.GetId();
- size_t intervals = histo.FreqSize();
- if (intervals == 0) {
- IsEmpty = true;
- return;
- }
- Intervals = intervals;
- TrainingSetSize = Intervals;
- BaseIndex = Intervals / 2;
-
- if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue()) || !IsValidFloat(histo.GetBinRange())) {
+ Freqs[i] *= factor;
+ }
+ }
+
+ void TFixedBinHistogram::FromProto(const THistogram& histo) {
+ if (histo.HasType() && histo.GetType() != HT_FIXED_BIN_HISTOGRAM) {
+ ythrow yexception() << "Attempt to parse TFixedBinHistogram from THistogram protobuf record of wrong type = " << (ui32)histo.GetType();
+ }
+ TrainingSet.Destroy();
+ IsInitialized = false;
+ Sum = 0.0;
+
+ Id = histo.GetId();
+ size_t intervals = histo.FreqSize();
+ if (intervals == 0) {
+ IsEmpty = true;
+ return;
+ }
+ Intervals = intervals;
+ TrainingSetSize = Intervals;
+ BaseIndex = Intervals / 2;
+
+ if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue()) || !IsValidFloat(histo.GetBinRange())) {
ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f binrange %f", Id, histo.GetMinValue(), histo.GetMaxValue(), histo.GetBinRange());
- }
-
- double minValue = histo.GetMinValue();
- double binRange = histo.GetBinRange();
- double maxValue = histo.HasMaxValue() ? histo.GetMaxValue() : minValue + binRange * Intervals;
- SetFrame(minValue, maxValue, true);
- BinRange = binRange;
- for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- Freqs[i] = histo.GetFreq(i - BaseIndex);
- if (!IsValidFloat(Freqs[i])) {
+ }
+
+ double minValue = histo.GetMinValue();
+ double binRange = histo.GetBinRange();
+ double maxValue = histo.HasMaxValue() ? histo.GetMaxValue() : minValue + binRange * Intervals;
+ SetFrame(minValue, maxValue, true);
+ BinRange = binRange;
+ for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
+ Freqs[i] = histo.GetFreq(i - BaseIndex);
+ if (!IsValidFloat(Freqs[i])) {
ythrow yexception() << Sprintf("FromProto in histogram id %lu: bad value %f", Id, Freqs[i]);
- }
- Sum += Freqs[i];
- }
- }
-
- void TFixedBinHistogram::ToProto(THistogram& histo) {
- histo.Clear();
- if (!IsInitialized) {
- Initialize();
- }
- histo.SetType(HT_FIXED_BIN_HISTOGRAM);
- histo.SetId(Id);
- if (IsEmpty) {
- return;
- }
- if (FirstUsedBin < (i32)BaseIndex || (LastUsedBin - FirstUsedBin + 1) > (i32)Intervals) {
- Shrink(MinValue, MaxValue);
- }
- histo.SetMinValue(MinValue);
- histo.SetMaxValue(MaxValue);
- histo.SetBinRange(BinRange);
- for (ui32 i = BaseIndex; i < BaseIndex + Intervals; ++i) {
- histo.AddFreq(Freqs[i]);
- }
- }
-
+ }
+ Sum += Freqs[i];
+ }
+ }
+
+ void TFixedBinHistogram::ToProto(THistogram& histo) {
+ histo.Clear();
+ if (!IsInitialized) {
+ Initialize();
+ }
+ histo.SetType(HT_FIXED_BIN_HISTOGRAM);
+ histo.SetId(Id);
+ if (IsEmpty) {
+ return;
+ }
+ if (FirstUsedBin < (i32)BaseIndex || (LastUsedBin - FirstUsedBin + 1) > (i32)Intervals) {
+ Shrink(MinValue, MaxValue);
+ }
+ histo.SetMinValue(MinValue);
+ histo.SetMaxValue(MaxValue);
+ histo.SetBinRange(BinRange);
+ for (ui32 i = BaseIndex; i < BaseIndex + Intervals; ++i) {
+ histo.AddFreq(Freqs[i]);
+ }
+ }
+
void TFixedBinHistogram::SetId(ui64 id) {
- Id = id;
- }
-
+ Id = id;
+ }
+
ui64 TFixedBinHistogram::GetId() {
- return Id;
- }
-
- bool TFixedBinHistogram::Empty() {
- if (!IsInitialized) {
- Initialize();
- }
- return IsEmpty;
- }
-
- double TFixedBinHistogram::GetMinValue() {
- if (!IsInitialized) {
- Initialize();
- }
- return MinValue;
- }
-
- double TFixedBinHistogram::GetMaxValue() {
- if (!IsInitialized) {
- Initialize();
- }
- return MaxValue;
- }
-
- double TFixedBinHistogram::GetSum() {
- return Sum;
- }
-
- double TFixedBinHistogram::GetSumInRange(double leftBound, double rightBound) {
- if (!IsInitialized) {
- Initialize();
- }
- if (leftBound > rightBound) {
- return 0.0;
- }
- return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
- }
-
- double TFixedBinHistogram::GetSumAboveBound(double bound) {
- if (!IsInitialized) {
- Initialize();
- }
- if (IsEmpty) {
- return 0.0;
- }
+ return Id;
+ }
+
+ bool TFixedBinHistogram::Empty() {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ return IsEmpty;
+ }
+
+ double TFixedBinHistogram::GetMinValue() {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ return MinValue;
+ }
+
+ double TFixedBinHistogram::GetMaxValue() {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ return MaxValue;
+ }
+
+ double TFixedBinHistogram::GetSum() {
+ return Sum;
+ }
+
+ double TFixedBinHistogram::GetSumInRange(double leftBound, double rightBound) {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ if (leftBound > rightBound) {
+ return 0.0;
+ }
+ return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
+ }
+
+ double TFixedBinHistogram::GetSumAboveBound(double bound) {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ if (IsEmpty) {
+ return 0.0;
+ }
if (BinRange == 0.0) { // special case - all values added to histogram are the same
- return (bound <= ReferencePoint) ? Sum : 0.0;
- }
- i32 bin = CalcBin(bound);
- if (bin < FirstUsedBin) {
- return Sum;
- }
- if (bin > LastUsedBin) {
- return 0.0;
- }
- double binStart = BinStart(bin);
- double binEnd = BinEnd(bin);
+ return (bound <= ReferencePoint) ? Sum : 0.0;
+ }
+ i32 bin = CalcBin(bound);
+ if (bin < FirstUsedBin) {
+ return Sum;
+ }
+ if (bin > LastUsedBin) {
+ return 0.0;
+ }
+ double binStart = BinStart(bin);
+ double binEnd = BinEnd(bin);
double result = (bound < binStart) ? Freqs[bin] : Freqs[bin] * (binEnd - bound) / (binEnd - binStart);
- for (i32 i = bin + 1; i <= LastUsedBin; ++i) {
- result += Freqs[i];
- }
- return result;
- }
-
- double TFixedBinHistogram::GetSumBelowBound(double bound) {
- if (!IsInitialized) {
- Initialize();
- }
- if (IsEmpty) {
- return 0.0;
- }
+ for (i32 i = bin + 1; i <= LastUsedBin; ++i) {
+ result += Freqs[i];
+ }
+ return result;
+ }
+
+ double TFixedBinHistogram::GetSumBelowBound(double bound) {
+ if (!IsInitialized) {
+ Initialize();
+ }
+ if (IsEmpty) {
+ return 0.0;
+ }
if (BinRange == 0.0) { // special case - all values added to histogram are the same
- return (bound > ReferencePoint) ? Sum : 0.0;
- }
- i32 bin = CalcBin(bound);
- if (bin < FirstUsedBin) {
- return 0.0;
- }
- if (bin > LastUsedBin) {
- return Sum;
- }
- double binStart = BinStart(bin);
- double binEnd = BinEnd(bin);
+ return (bound > ReferencePoint) ? Sum : 0.0;
+ }
+ i32 bin = CalcBin(bound);
+ if (bin < FirstUsedBin) {
+ return 0.0;
+ }
+ if (bin > LastUsedBin) {
+ return Sum;
+ }
+ double binStart = BinStart(bin);
+ double binEnd = BinEnd(bin);
double result = (bound > binEnd) ? Freqs[bin] : Freqs[bin] * (bound - binStart) / (binEnd - binStart);
- for (i32 i = bin - 1; i >= FirstUsedBin; --i) {
- result += Freqs[i];
- }
- return result;
- }
-
- double TFixedBinHistogram::CalcUpperBound(double sum) {
- if (!IsInitialized) {
- Initialize();
- }
+ for (i32 i = bin - 1; i >= FirstUsedBin; --i) {
+ result += Freqs[i];
+ }
+ return result;
+ }
+
+ double TFixedBinHistogram::CalcUpperBound(double sum) {
+ if (!IsInitialized) {
+ Initialize();
+ }
if (sum == 0.0) {
return MinValue;
}
- if (IsEmpty) {
- return MaxValue;
- }
- i32 currentBin = FirstUsedBin;
- double gatheredSum = 0.0;
- while (gatheredSum < sum && currentBin <= LastUsedBin) {
- gatheredSum += Freqs[currentBin];
- ++currentBin;
- }
- --currentBin;
+ if (IsEmpty) {
+ return MaxValue;
+ }
+ i32 currentBin = FirstUsedBin;
+ double gatheredSum = 0.0;
+ while (gatheredSum < sum && currentBin <= LastUsedBin) {
+ gatheredSum += Freqs[currentBin];
+ ++currentBin;
+ }
+ --currentBin;
if ((gatheredSum <= sum && currentBin == LastUsedBin) || (Freqs[currentBin] == 0)) {
- return MaxValue;
- }
- double binStart = BinStart(currentBin);
- double binEnd = BinEnd(currentBin);
- return binEnd - (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
- }
-
- double TFixedBinHistogram::CalcLowerBound(double sum) {
- if (!IsInitialized) {
- Initialize();
- }
+ return MaxValue;
+ }
+ double binStart = BinStart(currentBin);
+ double binEnd = BinEnd(currentBin);
+ return binEnd - (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
+ }
+
+ double TFixedBinHistogram::CalcLowerBound(double sum) {
+ if (!IsInitialized) {
+ Initialize();
+ }
if (sum == 0.0) {
return MaxValue;
}
- if (IsEmpty) {
- return MinValue;
- }
- i32 currentBin = LastUsedBin;
- double gatheredSum = 0.0;
- while (gatheredSum < sum && currentBin >= FirstUsedBin) {
- gatheredSum += Freqs[currentBin];
- --currentBin;
- }
- ++currentBin;
+ if (IsEmpty) {
+ return MinValue;
+ }
+ i32 currentBin = LastUsedBin;
+ double gatheredSum = 0.0;
+ while (gatheredSum < sum && currentBin >= FirstUsedBin) {
+ gatheredSum += Freqs[currentBin];
+ --currentBin;
+ }
+ ++currentBin;
if ((gatheredSum <= sum && currentBin == FirstUsedBin) || (Freqs[currentBin] == 0)) {
- return MinValue;
- }
- double binStart = BinStart(currentBin);
- double binEnd = BinEnd(currentBin);
- return binStart + (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
- }
-
+ return MinValue;
+ }
+ double binStart = BinStart(currentBin);
+ double binEnd = BinEnd(currentBin);
+ return binStart + (binEnd - binStart) * (gatheredSum - sum) / Freqs[currentBin];
+ }
+
double TFixedBinHistogram::CalcUpperBoundSafe(double sum) {
if (!Empty()) {
sum = Max(Freqs[FirstUsedBin], sum);
@@ -434,67 +434,67 @@ namespace NKiwiAggr {
return CalcLowerBound(sum);
}
- double TFixedBinHistogram::CalcBinRange(double referencePoint, double maxValue) {
- return (maxValue - referencePoint) / ((double)Intervals - 0.02);
- }
-
- void TFixedBinHistogram::SetFrame(double minValue, double maxValue, bool clear) {
- MinValue = minValue;
- MaxValue = maxValue;
- ReferencePoint = MinValue;
- BinRange = CalcBinRange(ReferencePoint, MaxValue);
- FirstUsedBin = BaseIndex;
- LastUsedBin = (BinRange == 0.0) ? BaseIndex : BaseIndex + Intervals - 1;
- if (clear) {
- Freqs.assign(2 * Intervals, 0.0);
- ReserveFreqs.assign(2 * Intervals, 0.0);
- IsEmpty = false;
- IsInitialized = true;
- }
- }
-
- void TFixedBinHistogram::FromIHistogram(IHistogram* histo) {
- if (!histo) {
- ythrow yexception() << "Attempt to create TFixedBinFistogram from a NULL pointer";
- }
- Id = histo->GetId();
- if (histo->Empty()) {
- IsInitialized = false;
- IsEmpty = true;
- return;
- }
- SetFrame(histo->GetMinValue(), histo->GetMaxValue(), true);
- Sum = histo->GetSum();
- if (BinRange == 0.0) {
- Freqs[BaseIndex] = Sum;
- return;
- }
- for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- Freqs[i] = histo->GetSumInRange(BinStart(i), BinEnd(i));
- }
- return;
- }
-
- void TFixedBinHistogram::Initialize() {
- if (IsInitialized) {
- return;
- }
- if (!TrainingSet || TrainingSet->size() == 0) {
- IsEmpty = true;
- return;
- }
- SetFrame(MinElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first,
- MaxElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first, true);
+ double TFixedBinHistogram::CalcBinRange(double referencePoint, double maxValue) {
+ return (maxValue - referencePoint) / ((double)Intervals - 0.02);
+ }
+
+ void TFixedBinHistogram::SetFrame(double minValue, double maxValue, bool clear) {
+ MinValue = minValue;
+ MaxValue = maxValue;
+ ReferencePoint = MinValue;
+ BinRange = CalcBinRange(ReferencePoint, MaxValue);
+ FirstUsedBin = BaseIndex;
+ LastUsedBin = (BinRange == 0.0) ? BaseIndex : BaseIndex + Intervals - 1;
+ if (clear) {
+ Freqs.assign(2 * Intervals, 0.0);
+ ReserveFreqs.assign(2 * Intervals, 0.0);
+ IsEmpty = false;
+ IsInitialized = true;
+ }
+ }
+
+ void TFixedBinHistogram::FromIHistogram(IHistogram* histo) {
+ if (!histo) {
+ ythrow yexception() << "Attempt to create TFixedBinFistogram from a NULL pointer";
+ }
+ Id = histo->GetId();
+ if (histo->Empty()) {
+ IsInitialized = false;
+ IsEmpty = true;
+ return;
+ }
+ SetFrame(histo->GetMinValue(), histo->GetMaxValue(), true);
+ Sum = histo->GetSum();
+ if (BinRange == 0.0) {
+ Freqs[BaseIndex] = Sum;
+ return;
+ }
+ for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
+ Freqs[i] = histo->GetSumInRange(BinStart(i), BinEnd(i));
+ }
+ return;
+ }
+
+ void TFixedBinHistogram::Initialize() {
+ if (IsInitialized) {
+ return;
+ }
+ if (!TrainingSet || TrainingSet->size() == 0) {
+ IsEmpty = true;
+ return;
+ }
+ SetFrame(MinElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first,
+ MaxElement(TrainingSet->begin(), TrainingSet->end(), CompareWeightedValue)->first, true);
for (TVector<TWeightedValue>::const_iterator it = TrainingSet->begin(); it != TrainingSet->end(); ++it) {
- Freqs[CalcBin(it->first)] += it->second;
- }
- TrainingSet.Destroy();
- }
-
- i32 TFixedBinHistogram::CalcBin(double value) {
- return (BinRange == 0.0) ? BaseIndex : static_cast<i32>(BaseIndex + (value - ReferencePoint) / BinRange);
- }
-
+ Freqs[CalcBin(it->first)] += it->second;
+ }
+ TrainingSet.Destroy();
+ }
+
+ i32 TFixedBinHistogram::CalcBin(double value) {
+ return (BinRange == 0.0) ? BaseIndex : static_cast<i32>(BaseIndex + (value - ReferencePoint) / BinRange);
+ }
+
double TFixedBinHistogram::CalcDensity(double value) {
i32 bin = CalcBin(value);
if (bin < 0 || bin >= (i32)Freqs.size() || BinRange == 0.0 || GetSum() == 0) {
@@ -503,36 +503,36 @@ namespace NKiwiAggr {
return Freqs[bin] / GetSum() / BinRange;
}
- double TFixedBinHistogram::BinStart(i32 i) {
- return Max(ReferencePoint + (i - BaseIndex) * BinRange, MinValue);
- }
-
- double TFixedBinHistogram::BinEnd(i32 i) {
- return Min(ReferencePoint + (i + 1 - BaseIndex) * BinRange, MaxValue);
- }
-
- void TFixedBinHistogram::Shrink(double newReferencePoint, double newMaxValue) {
+ double TFixedBinHistogram::BinStart(i32 i) {
+ return Max(ReferencePoint + (i - BaseIndex) * BinRange, MinValue);
+ }
+
+ double TFixedBinHistogram::BinEnd(i32 i) {
+ return Min(ReferencePoint + (i + 1 - BaseIndex) * BinRange, MaxValue);
+ }
+
+ void TFixedBinHistogram::Shrink(double newReferencePoint, double newMaxValue) {
Y_VERIFY(newReferencePoint < newMaxValue, "Invalid Shrink()");
- memset(&(ReserveFreqs[0]), 0, ReserveFreqs.size() * sizeof(double));
-
- double newBinRange = CalcBinRange(newReferencePoint, newMaxValue);
+ memset(&(ReserveFreqs[0]), 0, ReserveFreqs.size() * sizeof(double));
+
+ double newBinRange = CalcBinRange(newReferencePoint, newMaxValue);
for (i32 i = FirstUsedBin; i <= LastUsedBin; ++i) {
- double binStart = BinStart(i);
- double binEnd = BinEnd(i);
- double freq = Freqs[i];
- i32 firstCross = static_cast<i32>(BaseIndex + (binStart - newReferencePoint) / newBinRange);
- i32 lastCross = static_cast<i32>(BaseIndex + (binEnd - newReferencePoint) / newBinRange);
- for (i32 j = firstCross; j <= lastCross; ++j) {
- double newBinStart = newReferencePoint + (j - BaseIndex) * newBinRange;
- double newBinEnd = newReferencePoint + (j + 1 - BaseIndex) * newBinRange;
- double crossStart = Max(newBinStart, binStart);
- double crossEnd = Min(newBinEnd, binEnd);
- ReserveFreqs[j] += (binStart == binEnd) ? freq : freq * (crossEnd - crossStart) / (binEnd - binStart);
- }
- }
-
- Freqs.swap(ReserveFreqs);
- SetFrame(newReferencePoint, newMaxValue, false);
- }
-
+ double binStart = BinStart(i);
+ double binEnd = BinEnd(i);
+ double freq = Freqs[i];
+ i32 firstCross = static_cast<i32>(BaseIndex + (binStart - newReferencePoint) / newBinRange);
+ i32 lastCross = static_cast<i32>(BaseIndex + (binEnd - newReferencePoint) / newBinRange);
+ for (i32 j = firstCross; j <= lastCross; ++j) {
+ double newBinStart = newReferencePoint + (j - BaseIndex) * newBinRange;
+ double newBinEnd = newReferencePoint + (j + 1 - BaseIndex) * newBinRange;
+ double crossStart = Max(newBinStart, binStart);
+ double crossEnd = Min(newBinEnd, binEnd);
+ ReserveFreqs[j] += (binStart == binEnd) ? freq : freq * (crossEnd - crossStart) / (binEnd - binStart);
+ }
+ }
+
+ Freqs.swap(ReserveFreqs);
+ SetFrame(newReferencePoint, newMaxValue, false);
+ }
+
}
diff --git a/library/cpp/histogram/adaptive/fixed_bin_histogram.h b/library/cpp/histogram/adaptive/fixed_bin_histogram.h
index 326d962c46..bd380bd94a 100644
--- a/library/cpp/histogram/adaptive/fixed_bin_histogram.h
+++ b/library/cpp/histogram/adaptive/fixed_bin_histogram.h
@@ -1,91 +1,91 @@
#pragma once
-#include "histogram.h"
-
+#include "histogram.h"
+
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
-
-#include <util/generic/ptr.h>
+
+#include <util/generic/ptr.h>
#include <util/generic/vector.h>
#include <utility>
namespace NKiwiAggr {
- class TFixedBinHistogram: private TNonCopyable, public IHistogram {
- private:
- static const size_t DEFAULT_TRAINING_SET_SIZE = 10000;
- static const size_t DEFAULT_INTERVALS = 100;
+ class TFixedBinHistogram: private TNonCopyable, public IHistogram {
+ private:
+ static const size_t DEFAULT_TRAINING_SET_SIZE = 10000;
+ static const size_t DEFAULT_INTERVALS = 100;
typedef std::pair<double, double> TWeightedValue; // value, weight
THolder<TVector<TWeightedValue>> TrainingSet;
- size_t TrainingSetSize;
-
- bool IsInitialized;
- bool IsEmpty;
+ size_t TrainingSetSize;
+
+ bool IsInitialized;
+ bool IsEmpty;
ui64 Id;
- double MinValue;
- double MaxValue;
- double Sum;
-
+ double MinValue;
+ double MaxValue;
+ double Sum;
+
TVector<double> Freqs;
TVector<double> ReserveFreqs;
- double ReferencePoint;
- double BinRange;
- size_t Intervals;
- i32 FirstUsedBin;
- i32 LastUsedBin;
- i32 BaseIndex;
-
- public:
+ double ReferencePoint;
+ double BinRange;
+ size_t Intervals;
+ i32 FirstUsedBin;
+ i32 LastUsedBin;
+ i32 BaseIndex;
+
+ public:
TFixedBinHistogram(size_t intervals, ui64 id = 0, size_t trainingSetSize = DEFAULT_TRAINING_SET_SIZE);
TFixedBinHistogram(const THistogram& histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0, size_t trainingSetSize = DEFAULT_TRAINING_SET_SIZE);
TFixedBinHistogram(IHistogram* histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0, size_t trainingSetSize = DEFAULT_TRAINING_SET_SIZE);
virtual ~TFixedBinHistogram() {
- }
+ }
+
+ virtual void Clear();
+
+ virtual void Add(double value, double weight);
+ virtual void Add(const THistoRec& histoRec);
- virtual void Clear();
-
- virtual void Add(double value, double weight);
- virtual void Add(const THistoRec& histoRec);
-
virtual void Merge(const THistogram& histo, double multiplier);
virtual void Merge(const TVector<THistogram>& histogramsToMerge);
virtual void Merge(TVector<IHistogramPtr> histogramsToMerge);
-
- virtual void Multiply(double factor);
-
- virtual void FromProto(const THistogram& histo);
- virtual void ToProto(THistogram& histo);
-
+
+ virtual void Multiply(double factor);
+
+ virtual void FromProto(const THistogram& histo);
+ virtual void ToProto(THistogram& histo);
+
virtual void SetId(ui64 id);
virtual ui64 GetId();
- virtual bool Empty();
- virtual double GetMinValue();
- virtual double GetMaxValue();
- virtual double GetSum();
- virtual double GetSumInRange(double leftBound, double rightBound);
- virtual double GetSumAboveBound(double bound);
- virtual double GetSumBelowBound(double bound);
- virtual double CalcUpperBound(double sum);
- virtual double CalcLowerBound(double sum);
+ virtual bool Empty();
+ virtual double GetMinValue();
+ virtual double GetMaxValue();
+ virtual double GetSum();
+ virtual double GetSumInRange(double leftBound, double rightBound);
+ virtual double GetSumAboveBound(double bound);
+ virtual double GetSumBelowBound(double bound);
+ virtual double CalcUpperBound(double sum);
+ virtual double CalcLowerBound(double sum);
virtual double CalcUpperBoundSafe(double sum);
virtual double CalcLowerBoundSafe(double sum);
-
+
double CalcDensity(double value);
- private:
- double CalcBinRange(double referencePoint, double maxValue);
- void SetFrame(double minValue, double maxValue, bool clear);
- void FromIHistogram(IHistogram* histo);
- void Initialize();
- i32 CalcBin(double value);
- double BinStart(i32 i);
- double BinEnd(i32 i);
- void Shrink(double newMinValue, double newMaxValue);
-
- static bool CompareWeightedValue(const TWeightedValue& left, const TWeightedValue& right) {
- return left.first < right.first;
- }
- };
-
+ private:
+ double CalcBinRange(double referencePoint, double maxValue);
+ void SetFrame(double minValue, double maxValue, bool clear);
+ void FromIHistogram(IHistogram* histo);
+ void Initialize();
+ i32 CalcBin(double value);
+ double BinStart(i32 i);
+ double BinEnd(i32 i);
+ void Shrink(double newMinValue, double newMaxValue);
+
+ static bool CompareWeightedValue(const TWeightedValue& left, const TWeightedValue& right) {
+ return left.first < right.first;
+ }
+ };
+
}
diff --git a/library/cpp/histogram/adaptive/histogram.h b/library/cpp/histogram/adaptive/histogram.h
index 58a6af4605..360fd9a693 100644
--- a/library/cpp/histogram/adaptive/histogram.h
+++ b/library/cpp/histogram/adaptive/histogram.h
@@ -1,54 +1,54 @@
-#pragma once
-
-#include <util/generic/ptr.h>
-#include <util/generic/vector.h>
-
-namespace NKiwiAggr {
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+
+namespace NKiwiAggr {
class THistogram;
class THistoRec;
-
- class IHistogram;
+
+ class IHistogram;
typedef TAtomicSharedPtr<IHistogram> IHistogramPtr;
-
- class IHistogram {
- public:
- // Supposed constructors:
- //
+
+ class IHistogram {
+ public:
+ // Supposed constructors:
+ //
// TSomeHistogram(size_t intervals, ui64 id = 0); // where intervals is some constant that defines histogram accuracy
- // TSomeHistogram(const THistogram& histo); // histo must be acceptable for TSomeHistogram, for example, only with HT_FIXED_BIN_HISTOGRAM for TFixedBinHistogram
- // TSomeHistogram(IHistogram* histo); // any kind of IHistogram
-
+ // TSomeHistogram(const THistogram& histo); // histo must be acceptable for TSomeHistogram, for example, only with HT_FIXED_BIN_HISTOGRAM for TFixedBinHistogram
+ // TSomeHistogram(IHistogram* histo); // any kind of IHistogram
+
virtual ~IHistogram() {
- }
-
- virtual void Clear() = 0;
-
- // zero- or negative-weighted values are skipped
- virtual void Add(double value, double weight) = 0;
- virtual void Add(const THistoRec& histoRec) = 0;
-
- // Merge some other histos into current
+ }
+
+ virtual void Clear() = 0;
+
+ // zero- or negative-weighted values are skipped
+ virtual void Add(double value, double weight) = 0;
+ virtual void Add(const THistoRec& histoRec) = 0;
+
+ // Merge some other histos into current
virtual void Merge(const THistogram& histo, double multiplier) = 0;
virtual void Merge(const TVector<THistogram>& histogramsToMerge) = 0;
virtual void Merge(TVector<IHistogramPtr> histogramsToMerge) = 0;
-
- // factor should be greater then zero
- virtual void Multiply(double factor) = 0;
-
+
+ // factor should be greater then zero
+ virtual void Multiply(double factor) = 0;
+
virtual void FromProto(const THistogram& histo) = 0; // throws exception in case of wrong histogram type of histo
- virtual void ToProto(THistogram& histo) = 0;
-
+ virtual void ToProto(THistogram& histo) = 0;
+
virtual void SetId(ui64 id) = 0;
virtual ui64 GetId() = 0;
- virtual bool Empty() = 0;
- virtual double GetMinValue() = 0;
- virtual double GetMaxValue() = 0;
- virtual double GetSum() = 0;
- virtual double GetSumInRange(double leftBound, double rightBound) = 0;
- virtual double GetSumAboveBound(double bound) = 0;
- virtual double GetSumBelowBound(double bound) = 0;
- virtual double CalcUpperBound(double sum) = 0;
- virtual double CalcLowerBound(double sum) = 0;
+ virtual bool Empty() = 0;
+ virtual double GetMinValue() = 0;
+ virtual double GetMaxValue() = 0;
+ virtual double GetSum() = 0;
+ virtual double GetSumInRange(double leftBound, double rightBound) = 0;
+ virtual double GetSumAboveBound(double bound) = 0;
+ virtual double GetSumBelowBound(double bound) = 0;
+ virtual double CalcUpperBound(double sum) = 0;
+ virtual double CalcLowerBound(double sum) = 0;
virtual double CalcUpperBoundSafe(double sum) = 0;
virtual double CalcLowerBoundSafe(double sum) = 0;
double GetValueAtPercentile(double percentile) {
@@ -61,6 +61,6 @@ namespace NKiwiAggr {
// Histogram implementation is supposed to clear all precomputed values() if Add() is called after PrecomputePartialSums()
virtual void PrecomputePartialSums() {
}
- };
-
+ };
+
}
diff --git a/library/cpp/histogram/adaptive/merger.h b/library/cpp/histogram/adaptive/merger.h
index a1db5e4c4b..fc9a6b6a4f 100644
--- a/library/cpp/histogram/adaptive/merger.h
+++ b/library/cpp/histogram/adaptive/merger.h
@@ -1,68 +1,68 @@
-#pragma once
-
-#include <util/generic/buffer.h>
-
-namespace NKiwiAggr {
- class IMerger {
- private:
- bool IsMerged;
+#pragma once
+
+#include <util/generic/buffer.h>
+
+namespace NKiwiAggr {
+ class IMerger {
+ private:
+ bool IsMerged;
ui32 AutoMergeInterval; // Call Merge() after each AutoMergeInterval calls of Add(); zero means no autoMerge
- ui32 NotMergedCount;
-
- public:
- IMerger(ui32 autoMergeInterval = 0)
- : IsMerged(true)
- , AutoMergeInterval(autoMergeInterval)
- , NotMergedCount(0)
- {
- }
-
+ ui32 NotMergedCount;
+
+ public:
+ IMerger(ui32 autoMergeInterval = 0)
+ : IsMerged(true)
+ , AutoMergeInterval(autoMergeInterval)
+ , NotMergedCount(0)
+ {
+ }
+
virtual ~IMerger() {
- }
-
- // returns true if something is added
- virtual bool Add(const void* data, size_t size) {
- if (AddImpl(data, size)) {
- AutoMerge();
- return true;
- }
- return false;
- }
-
- virtual void Merge() {
- if (!IsMerged) {
- MergeImpl();
- IsMerged = true;
- }
- }
-
- virtual void Reset() {
- ResetImpl();
- IsMerged = true;
- }
-
- // You can add some more result-getters if you want.
- // Do not forget to call Merge() in the beginning of each merger.
- virtual void GetResult(TBuffer& buffer) = 0;
-
- protected:
- // AutoMerge() is called in Add() after each AddImpl()
- void AutoMerge() {
- IsMerged = false;
- if (AutoMergeInterval) {
- ++NotMergedCount;
- if (NotMergedCount >= AutoMergeInterval) {
- MergeImpl();
- IsMerged = true;
- NotMergedCount = 0;
- }
- }
- }
-
- // Implementation of merger: define it in derivatives
- virtual bool AddImpl(const void* data, size_t size) = 0; // returns true if something is added
- virtual void MergeImpl() = 0;
- virtual void ResetImpl() = 0;
- };
-
+ }
+
+ // returns true if something is added
+ virtual bool Add(const void* data, size_t size) {
+ if (AddImpl(data, size)) {
+ AutoMerge();
+ return true;
+ }
+ return false;
+ }
+
+ virtual void Merge() {
+ if (!IsMerged) {
+ MergeImpl();
+ IsMerged = true;
+ }
+ }
+
+ virtual void Reset() {
+ ResetImpl();
+ IsMerged = true;
+ }
+
+ // You can add some more result-getters if you want.
+ // Do not forget to call Merge() in the beginning of each merger.
+ virtual void GetResult(TBuffer& buffer) = 0;
+
+ protected:
+ // AutoMerge() is called in Add() after each AddImpl()
+ void AutoMerge() {
+ IsMerged = false;
+ if (AutoMergeInterval) {
+ ++NotMergedCount;
+ if (NotMergedCount >= AutoMergeInterval) {
+ MergeImpl();
+ IsMerged = true;
+ NotMergedCount = 0;
+ }
+ }
+ }
+
+ // Implementation of merger: define it in derivatives
+ virtual bool AddImpl(const void* data, size_t size) = 0; // returns true if something is added
+ virtual void MergeImpl() = 0;
+ virtual void ResetImpl() = 0;
+ };
+
}
diff --git a/library/cpp/histogram/adaptive/multi_histogram.h b/library/cpp/histogram/adaptive/multi_histogram.h
index 5672407068..41caac5ba6 100644
--- a/library/cpp/histogram/adaptive/multi_histogram.h
+++ b/library/cpp/histogram/adaptive/multi_histogram.h
@@ -1,83 +1,83 @@
-#pragma once
-
-#include "histogram.h"
-#include "auto_histogram.h"
-
+#pragma once
+
+#include "histogram.h"
+#include "auto_histogram.h"
+
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
-
+
#include <util/generic/hash.h>
-#include <util/generic/ptr.h>
+#include <util/generic/ptr.h>
#include <utility>
-
-namespace NKiwiAggr {
- template <class TMyHistogram>
- class TMultiHistogram {
- private:
- static const size_t DEFAULT_INTERVALS = 100;
-
+
+namespace NKiwiAggr {
+ template <class TMyHistogram>
+ class TMultiHistogram {
+ private:
+ static const size_t DEFAULT_INTERVALS = 100;
+
typedef THashMap<ui64, IHistogramPtr> THistogramsMap;
- THistogramsMap Histograms;
- size_t Intervals;
-
- public:
- TMultiHistogram(size_t intervals = DEFAULT_INTERVALS)
- : Intervals(intervals)
- {
- }
-
- TMultiHistogram(const THistograms& histograms, size_t defaultIntervals = DEFAULT_INTERVALS)
- : Intervals(defaultIntervals)
- {
- FromProto(histograms);
- }
-
+ THistogramsMap Histograms;
+ size_t Intervals;
+
+ public:
+ TMultiHistogram(size_t intervals = DEFAULT_INTERVALS)
+ : Intervals(intervals)
+ {
+ }
+
+ TMultiHistogram(const THistograms& histograms, size_t defaultIntervals = DEFAULT_INTERVALS)
+ : Intervals(defaultIntervals)
+ {
+ FromProto(histograms);
+ }
+
virtual ~TMultiHistogram() {
- }
-
- void Clear() {
- Histograms.clear();
- }
-
- void Add(const THistoRecs& histoRecs) {
- for (size_t i = 0; i < histoRecs.HistoRecsSize(); ++i) {
- Add(histoRecs.GetHistoRecs(i).GetId(), histoRecs.GetHistoRecs(i).GetValue(), histoRecs.GetHistoRecs(i).GetWeight());
- }
- }
-
- void Add(const THistoRec& histoRec) {
- Add(histoRec.GetId(), histoRec.GetValue(), histoRec.GetWeight());
- }
-
+ }
+
+ void Clear() {
+ Histograms.clear();
+ }
+
+ void Add(const THistoRecs& histoRecs) {
+ for (size_t i = 0; i < histoRecs.HistoRecsSize(); ++i) {
+ Add(histoRecs.GetHistoRecs(i).GetId(), histoRecs.GetHistoRecs(i).GetValue(), histoRecs.GetHistoRecs(i).GetWeight());
+ }
+ }
+
+ void Add(const THistoRec& histoRec) {
+ Add(histoRec.GetId(), histoRec.GetValue(), histoRec.GetWeight());
+ }
+
void Add(ui64 id, double value, double weight) {
- THistogramsMap::const_iterator it = Histograms.find(id);
- if (it == Histograms.end()) {
+ THistogramsMap::const_iterator it = Histograms.find(id);
+ if (it == Histograms.end()) {
it = Histograms.insert(std::make_pair(id, IHistogramPtr(new TMyHistogram(Intervals, id)))).first;
- }
- it->second->Add(value, weight);
- }
-
- void Multiply(double factor) {
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- it->second->Multiply(factor);
- }
- }
-
+ }
+ it->second->Add(value, weight);
+ }
+
+ void Multiply(double factor) {
+ for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
+ it->second->Multiply(factor);
+ }
+ }
+
TVector<ui64> GetIds() const {
TVector<ui64> result(0);
for (THistogramsMap::const_iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
result.push_back(it->first);
- }
- return result;
- }
-
+ }
+ return result;
+ }
+
IHistogramPtr GetHistogram(ui64 id) const {
- THistogramsMap::const_iterator it = Histograms.find(id);
- if (it != Histograms.end()) {
- return it->second;
- }
- return IHistogramPtr();
- }
-
+ THistogramsMap::const_iterator it = Histograms.find(id);
+ if (it != Histograms.end()) {
+ return it->second;
+ }
+ return IHistogramPtr();
+ }
+
double GetMaxHistoSum() const {
double sum = 0.0;
for (THistogramsMap::const_iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
@@ -86,58 +86,58 @@ namespace NKiwiAggr {
return sum;
}
- bool Empty() {
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- if (!it->second->Empty()) {
- return false;
- }
- }
- return true;
- }
-
- virtual double OverallSum() {
- double sum = 0.0;
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- sum += it->second->GetSum();
- }
- return sum;
- }
-
- void FromProto(const THistograms& histograms) {
- for (size_t i = 0; i < histograms.HistoRecsSize(); ++i) {
- IHistogramPtr newHisto(new TMyHistogram(histograms.GetHistoRecs(i), Intervals));
- if (!newHisto->Empty()) {
- Histograms[newHisto->GetId()] = newHisto;
- }
- }
- }
-
- void ToProto(THistograms& histograms) {
- histograms.Clear();
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- THistogram* histo = histograms.AddHistoRecs();
- it->second->ToProto(*histo);
- }
- }
+ bool Empty() {
+ for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
+ if (!it->second->Empty()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ virtual double OverallSum() {
+ double sum = 0.0;
+ for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
+ sum += it->second->GetSum();
+ }
+ return sum;
+ }
+
+ void FromProto(const THistograms& histograms) {
+ for (size_t i = 0; i < histograms.HistoRecsSize(); ++i) {
+ IHistogramPtr newHisto(new TMyHistogram(histograms.GetHistoRecs(i), Intervals));
+ if (!newHisto->Empty()) {
+ Histograms[newHisto->GetId()] = newHisto;
+ }
+ }
+ }
+
+ void ToProto(THistograms& histograms) {
+ histograms.Clear();
+ for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
+ THistogram* histo = histograms.AddHistoRecs();
+ it->second->ToProto(*histo);
+ }
+ }
void PrecomputePartialSums() {
for (auto& it : Histograms) {
it.second->PrecomputePartialSums();
}
}
- };
-
- template <class TMerger, class TSomeMultiHistogram>
- static void MergeToMultiHistogram(const void* data, size_t size, TSomeMultiHistogram& multiHistogram, ui32 intervals = 300) {
- TMerger merger(intervals);
- merger.Add(data, size);
- THistograms histograms;
- merger.GetResult(histograms);
- multiHistogram.FromProto(histograms);
- }
-
- // Good for parsing from THistograms protobuf
- typedef TMultiHistogram<TAutoHistogram> TAutoMultiHistogram;
+ };
+
+ template <class TMerger, class TSomeMultiHistogram>
+ static void MergeToMultiHistogram(const void* data, size_t size, TSomeMultiHistogram& multiHistogram, ui32 intervals = 300) {
+ TMerger merger(intervals);
+ merger.Add(data, size);
+ THistograms histograms;
+ merger.GetResult(histograms);
+ multiHistogram.FromProto(histograms);
+ }
+
+ // Good for parsing from THistograms protobuf
+ typedef TMultiHistogram<TAutoHistogram> TAutoMultiHistogram;
typedef TAtomicSharedPtr<TAutoMultiHistogram> TAutoMultiHistogramPtr;
-
+
}
diff --git a/library/cpp/histogram/adaptive/protos/histo.proto b/library/cpp/histogram/adaptive/protos/histo.proto
index 3ca752d4d7..8961fef022 100644
--- a/library/cpp/histogram/adaptive/protos/histo.proto
+++ b/library/cpp/histogram/adaptive/protos/histo.proto
@@ -5,32 +5,32 @@ package NKiwiAggr;
message THistoRec {
optional uint64 Id = 1; // Current histogram identifier
optional double Value = 2;
- optional double Weight = 3 [default = 1.0]; // You can set a certain weight to each record or just skip records using Weight=0
+ optional double Weight = 3 [default = 1.0]; // You can set a certain weight to each record or just skip records using Weight=0
}
message THistoRecs {
repeated THistoRec HistoRecs = 1;
}
-enum EHistogramType {
- HT_FIXED_BIN_HISTOGRAM = 1;
- HT_ADAPTIVE_DISTANCE_HISTOGRAM = 2;
- HT_ADAPTIVE_WEIGHT_HISTOGRAM = 3;
- HT_ADAPTIVE_HISTOGRAM = 4; // if the quality function is unknown
+enum EHistogramType {
+ HT_FIXED_BIN_HISTOGRAM = 1;
+ HT_ADAPTIVE_DISTANCE_HISTOGRAM = 2;
+ HT_ADAPTIVE_WEIGHT_HISTOGRAM = 3;
+ HT_ADAPTIVE_HISTOGRAM = 4; // if the quality function is unknown
HT_ADAPTIVE_WARD_HISTOGRAM = 5;
-}
-
+}
+
message THistogram {
optional uint64 Id = 1;
optional double MinValue = 2;
- optional double BinRange = 4; // for FIXED_BIN_HISTOGRAM only. And it's OK that it is 4 after 2
- repeated float Freq = 5;
- repeated float Position = 6; // for ADAPTIVE histograms only
- optional double MaxValue = 7;
- optional EHistogramType Type = 8; // Empty field means FIXED_BIN_HISTOGRAM
+ optional double BinRange = 4; // for FIXED_BIN_HISTOGRAM only. And it's OK that it is 4 after 2
+ repeated float Freq = 5;
+ repeated float Position = 6; // for ADAPTIVE histograms only
+ optional double MaxValue = 7;
+ optional EHistogramType Type = 8; // Empty field means FIXED_BIN_HISTOGRAM
}
// Multihistogam
message THistograms {
repeated THistogram HistoRecs = 1;
-}
+}
diff --git a/library/cpp/histogram/adaptive/protos/ya.make b/library/cpp/histogram/adaptive/protos/ya.make
index b5a0c42114..7635cfcb8c 100644
--- a/library/cpp/histogram/adaptive/protos/ya.make
+++ b/library/cpp/histogram/adaptive/protos/ya.make
@@ -1,6 +1,6 @@
PROTO_LIBRARY()
-OWNER(g:crawl)
+OWNER(g:crawl)
SRCS(
histo.proto
diff --git a/library/cpp/histogram/adaptive/ya.make b/library/cpp/histogram/adaptive/ya.make
index 54c8d8af9a..b589801b27 100644
--- a/library/cpp/histogram/adaptive/ya.make
+++ b/library/cpp/histogram/adaptive/ya.make
@@ -7,7 +7,7 @@ OWNER(
SRCS(
common.cpp
- adaptive_histogram.cpp
+ adaptive_histogram.cpp
block_histogram.cpp
fixed_bin_histogram.cpp
)