diff options
author | svirg <svirg@yandex-team.ru> | 2022-02-10 16:50:56 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:56 +0300 |
commit | d7b26a7860e235a7b00891e1ffbc256c7c976f7d (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/histogram/adaptive/block_histogram.h | |
parent | 6b417b6bdff55c365f5835bb18c5a8053b975801 (diff) | |
download | ydb-d7b26a7860e235a7b00891e1ffbc256c7c976f7d.tar.gz |
Restoring authorship annotation for <svirg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/histogram/adaptive/block_histogram.h')
-rw-r--r-- | library/cpp/histogram/adaptive/block_histogram.h | 132 |
1 files changed, 66 insertions, 66 deletions
diff --git a/library/cpp/histogram/adaptive/block_histogram.h b/library/cpp/histogram/adaptive/block_histogram.h index a892c3da57..266bb2f2b2 100644 --- a/library/cpp/histogram/adaptive/block_histogram.h +++ b/library/cpp/histogram/adaptive/block_histogram.h @@ -1,7 +1,7 @@ #pragma once #include "histogram.h" -#include "common.h" +#include "common.h" #include <library/cpp/histogram/adaptive/protos/histo.pb.h> @@ -10,31 +10,31 @@ #include <utility> namespace NKiwiAggr { - /////////////////// - // TBlockHistogram - /////////////////// - - /** - * Contrary to adaptive histogram, block histogram doesn't rebuild bins - * after the addition of each point. Instead, it accumulates points and in case the amount - * of points overflows specified limits, it shrinks all the points at once to produce histogram - * Indeed, there exist two limits and two shrinkage operations: - * 1) FastGreedyShrink is fast but coarse. It is used to shrink from upper limit to intermediate limit - * (override FastGreedyShrink to set specific behaviour) - * 2) SlowShrink is slow, but produces finer histogram. It shrinks from the intermediate limit to the - * actual number of bins in a manner similar to that in adaptive histogram (set CalcQuality in constuctor) - * While FastGreedyShrink is used most of the time, SlowShrink is mostly used for histogram finalization - */ - class TBlockHistogram: private TNonCopyable, public IHistogram { - protected: + /////////////////// + // TBlockHistogram + /////////////////// + + /** + * Contrary to adaptive histogram, block histogram doesn't rebuild bins + * after the addition of each point. Instead, it accumulates points and in case the amount + * of points overflows specified limits, it shrinks all the points at once to produce histogram + * Indeed, there exist two limits and two shrinkage operations: + * 1) FastGreedyShrink is fast but coarse. It is used to shrink from upper limit to intermediate limit + * (override FastGreedyShrink to set specific behaviour) + * 2) SlowShrink is slow, but produces finer histogram. It shrinks from the intermediate limit to the + * actual number of bins in a manner similar to that in adaptive histogram (set CalcQuality in constuctor) + * While FastGreedyShrink is used most of the time, SlowShrink is mostly used for histogram finalization + */ + class TBlockHistogram: private TNonCopyable, public IHistogram { + protected: static const size_t SHRINK_MULTIPLIER = 2; static const size_t GREEDY_SHRINK_MULTIPLIER = 4; static const size_t DEFAULT_INTERVALS = 100; - static const size_t DEFAULT_SHRINK_SIZE = DEFAULT_INTERVALS * (SHRINK_MULTIPLIER + GREEDY_SHRINK_MULTIPLIER); + static const size_t DEFAULT_SHRINK_SIZE = DEFAULT_INTERVALS * (SHRINK_MULTIPLIER + GREEDY_SHRINK_MULTIPLIER); const EHistogramType Type; - const TQualityFunction CalcQuality; - + const TQualityFunction CalcQuality; + size_t Intervals; size_t ShrinkSize; size_t PrevSize; @@ -48,7 +48,7 @@ namespace NKiwiAggr { TVector<TWeightedValue> Bins; public: - TBlockHistogram(EHistogramType type, TQualityFunction calcQuality, + TBlockHistogram(EHistogramType type, TQualityFunction calcQuality, size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE); virtual ~TBlockHistogram() { @@ -59,7 +59,7 @@ namespace NKiwiAggr { virtual void Add(double value, double weight); virtual void Add(const THistoRec& histoRec); - virtual void Merge(const THistogram& histo, double multiplier); + virtual void Merge(const THistogram& histo, double multiplier); virtual void Merge(const TVector<THistogram>& histogramsToMerge); virtual void Merge(TVector<IHistogramPtr> histogramsToMerge); // not implemented @@ -86,63 +86,63 @@ namespace NKiwiAggr { private: void SortBins(); - void UniquifyBins(); + void UniquifyBins(); void CorrectShrinkSize(); void SortAndShrink(size_t intervals, bool final = false); - + void SlowShrink(size_t intervals); - virtual void FastGreedyShrink(size_t intervals) = 0; + virtual void FastGreedyShrink(size_t intervals) = 0; }; - ///////////////////////// - // TBlockWeightHistogram - ///////////////////////// - + ///////////////////////// + // TBlockWeightHistogram + ///////////////////////// + class TBlockWeightHistogram: public TBlockHistogram { - public: + public: TBlockWeightHistogram(size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE); - + virtual ~TBlockWeightHistogram() { } - - private: - virtual void FastGreedyShrink(size_t intervals) final; - }; - - /////////////////////// - // TBlockWardHistogram - /////////////////////// - + + private: + virtual void FastGreedyShrink(size_t intervals) final; + }; + + /////////////////////// + // TBlockWardHistogram + /////////////////////// + class TBlockWardHistogram: public TBlockHistogram { - public: + public: TBlockWardHistogram(size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE); - + virtual ~TBlockWardHistogram() { } - - private: + + private: using TCumulative = std::pair<double, double>; // cumulative sum of (weights, weighted centers) using TCumulatives = TVector<TCumulative>; - - struct TSplitInfo { - double profit; - - TCumulatives::const_iterator beg; - TCumulatives::const_iterator mid; - TCumulatives::const_iterator end; - - bool operator<(const TSplitInfo& other) const { - return profit < other.profit; - } - }; - - private: - virtual void FastGreedyShrink(size_t intervals) final; - - static bool CalcSplitInfo(const TCumulatives::const_iterator beg, - const TCumulatives::const_iterator end, - TSplitInfo& splitInfo); - }; - + + struct TSplitInfo { + double profit; + + TCumulatives::const_iterator beg; + TCumulatives::const_iterator mid; + TCumulatives::const_iterator end; + + bool operator<(const TSplitInfo& other) const { + return profit < other.profit; + } + }; + + private: + virtual void FastGreedyShrink(size_t intervals) final; + + static bool CalcSplitInfo(const TCumulatives::const_iterator beg, + const TCumulatives::const_iterator end, + TSplitInfo& splitInfo); + }; + } |