aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/histogram/adaptive/block_histogram.h
diff options
context:
space:
mode:
authorsvirg <svirg@yandex-team.ru>2022-02-10 16:50:56 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:56 +0300
commitd7b26a7860e235a7b00891e1ffbc256c7c976f7d (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/histogram/adaptive/block_histogram.h
parent6b417b6bdff55c365f5835bb18c5a8053b975801 (diff)
downloadydb-d7b26a7860e235a7b00891e1ffbc256c7c976f7d.tar.gz
Restoring authorship annotation for <svirg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/histogram/adaptive/block_histogram.h')
-rw-r--r--library/cpp/histogram/adaptive/block_histogram.h132
1 files changed, 66 insertions, 66 deletions
diff --git a/library/cpp/histogram/adaptive/block_histogram.h b/library/cpp/histogram/adaptive/block_histogram.h
index a892c3da57..266bb2f2b2 100644
--- a/library/cpp/histogram/adaptive/block_histogram.h
+++ b/library/cpp/histogram/adaptive/block_histogram.h
@@ -1,7 +1,7 @@
#pragma once
#include "histogram.h"
-#include "common.h"
+#include "common.h"
#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
@@ -10,31 +10,31 @@
#include <utility>
namespace NKiwiAggr {
- ///////////////////
- // TBlockHistogram
- ///////////////////
-
- /**
- * Contrary to adaptive histogram, block histogram doesn't rebuild bins
- * after the addition of each point. Instead, it accumulates points and in case the amount
- * of points overflows specified limits, it shrinks all the points at once to produce histogram
- * Indeed, there exist two limits and two shrinkage operations:
- * 1) FastGreedyShrink is fast but coarse. It is used to shrink from upper limit to intermediate limit
- * (override FastGreedyShrink to set specific behaviour)
- * 2) SlowShrink is slow, but produces finer histogram. It shrinks from the intermediate limit to the
- * actual number of bins in a manner similar to that in adaptive histogram (set CalcQuality in constuctor)
- * While FastGreedyShrink is used most of the time, SlowShrink is mostly used for histogram finalization
- */
- class TBlockHistogram: private TNonCopyable, public IHistogram {
- protected:
+ ///////////////////
+ // TBlockHistogram
+ ///////////////////
+
+ /**
+ * Contrary to adaptive histogram, block histogram doesn't rebuild bins
+ * after the addition of each point. Instead, it accumulates points and in case the amount
+ * of points overflows specified limits, it shrinks all the points at once to produce histogram
+ * Indeed, there exist two limits and two shrinkage operations:
+ * 1) FastGreedyShrink is fast but coarse. It is used to shrink from upper limit to intermediate limit
+ * (override FastGreedyShrink to set specific behaviour)
+ * 2) SlowShrink is slow, but produces finer histogram. It shrinks from the intermediate limit to the
+ * actual number of bins in a manner similar to that in adaptive histogram (set CalcQuality in constuctor)
+ * While FastGreedyShrink is used most of the time, SlowShrink is mostly used for histogram finalization
+ */
+ class TBlockHistogram: private TNonCopyable, public IHistogram {
+ protected:
static const size_t SHRINK_MULTIPLIER = 2;
static const size_t GREEDY_SHRINK_MULTIPLIER = 4;
static const size_t DEFAULT_INTERVALS = 100;
- static const size_t DEFAULT_SHRINK_SIZE = DEFAULT_INTERVALS * (SHRINK_MULTIPLIER + GREEDY_SHRINK_MULTIPLIER);
+ static const size_t DEFAULT_SHRINK_SIZE = DEFAULT_INTERVALS * (SHRINK_MULTIPLIER + GREEDY_SHRINK_MULTIPLIER);
const EHistogramType Type;
- const TQualityFunction CalcQuality;
-
+ const TQualityFunction CalcQuality;
+
size_t Intervals;
size_t ShrinkSize;
size_t PrevSize;
@@ -48,7 +48,7 @@ namespace NKiwiAggr {
TVector<TWeightedValue> Bins;
public:
- TBlockHistogram(EHistogramType type, TQualityFunction calcQuality,
+ TBlockHistogram(EHistogramType type, TQualityFunction calcQuality,
size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE);
virtual ~TBlockHistogram() {
@@ -59,7 +59,7 @@ namespace NKiwiAggr {
virtual void Add(double value, double weight);
virtual void Add(const THistoRec& histoRec);
- virtual void Merge(const THistogram& histo, double multiplier);
+ virtual void Merge(const THistogram& histo, double multiplier);
virtual void Merge(const TVector<THistogram>& histogramsToMerge);
virtual void Merge(TVector<IHistogramPtr> histogramsToMerge); // not implemented
@@ -86,63 +86,63 @@ namespace NKiwiAggr {
private:
void SortBins();
- void UniquifyBins();
+ void UniquifyBins();
void CorrectShrinkSize();
void SortAndShrink(size_t intervals, bool final = false);
-
+
void SlowShrink(size_t intervals);
- virtual void FastGreedyShrink(size_t intervals) = 0;
+ virtual void FastGreedyShrink(size_t intervals) = 0;
};
- /////////////////////////
- // TBlockWeightHistogram
- /////////////////////////
-
+ /////////////////////////
+ // TBlockWeightHistogram
+ /////////////////////////
+
class TBlockWeightHistogram: public TBlockHistogram {
- public:
+ public:
TBlockWeightHistogram(size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE);
-
+
virtual ~TBlockWeightHistogram() {
}
-
- private:
- virtual void FastGreedyShrink(size_t intervals) final;
- };
-
- ///////////////////////
- // TBlockWardHistogram
- ///////////////////////
-
+
+ private:
+ virtual void FastGreedyShrink(size_t intervals) final;
+ };
+
+ ///////////////////////
+ // TBlockWardHistogram
+ ///////////////////////
+
class TBlockWardHistogram: public TBlockHistogram {
- public:
+ public:
TBlockWardHistogram(size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE);
-
+
virtual ~TBlockWardHistogram() {
}
-
- private:
+
+ private:
using TCumulative = std::pair<double, double>; // cumulative sum of (weights, weighted centers)
using TCumulatives = TVector<TCumulative>;
-
- struct TSplitInfo {
- double profit;
-
- TCumulatives::const_iterator beg;
- TCumulatives::const_iterator mid;
- TCumulatives::const_iterator end;
-
- bool operator<(const TSplitInfo& other) const {
- return profit < other.profit;
- }
- };
-
- private:
- virtual void FastGreedyShrink(size_t intervals) final;
-
- static bool CalcSplitInfo(const TCumulatives::const_iterator beg,
- const TCumulatives::const_iterator end,
- TSplitInfo& splitInfo);
- };
-
+
+ struct TSplitInfo {
+ double profit;
+
+ TCumulatives::const_iterator beg;
+ TCumulatives::const_iterator mid;
+ TCumulatives::const_iterator end;
+
+ bool operator<(const TSplitInfo& other) const {
+ return profit < other.profit;
+ }
+ };
+
+ private:
+ virtual void FastGreedyShrink(size_t intervals) final;
+
+ static bool CalcSplitInfo(const TCumulatives::const_iterator beg,
+ const TCumulatives::const_iterator end,
+ TSplitInfo& splitInfo);
+ };
+
}