diff options
| author | aprudaev <[email protected]> | 2022-02-10 16:49:58 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:49:58 +0300 | 
| commit | 042dfdfd1388209ce3aa06dca56bd52ec63d40b2 (patch) | |
| tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/histogram | |
| parent | 29e66fe2ab37743577f88e6ab770defc4e6c1002 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/histogram')
| -rw-r--r-- | library/cpp/histogram/adaptive/block_histogram.cpp | 506 | ||||
| -rw-r--r-- | library/cpp/histogram/adaptive/block_histogram.h | 104 | ||||
| -rw-r--r-- | library/cpp/histogram/adaptive/histogram.h | 4 | 
3 files changed, 307 insertions, 307 deletions
diff --git a/library/cpp/histogram/adaptive/block_histogram.cpp b/library/cpp/histogram/adaptive/block_histogram.cpp index b5c32cf7ffa..6586d13ff62 100644 --- a/library/cpp/histogram/adaptive/block_histogram.cpp +++ b/library/cpp/histogram/adaptive/block_histogram.cpp @@ -1,141 +1,141 @@  #include "block_histogram.h" -  +  #include <library/cpp/histogram/adaptive/protos/histo.pb.h> -  +  #include <util/generic/algorithm.h> -#include <util/generic/yexception.h>  -#include <util/generic/intrlist.h>  -#include <util/generic/ptr.h>  +#include <util/generic/yexception.h> +#include <util/generic/intrlist.h> +#include <util/generic/ptr.h>  #include <util/generic/queue.h>  #include <util/generic/ymath.h>  #include <util/string/printf.h> -  -namespace {  -    struct TEmpty {  -    };  -  -    class TSmartHeap {  -    private:  + +namespace { +    struct TEmpty { +    }; + +    class TSmartHeap { +    private:          TVector<ui32> A;          TVector<ui32> Pos;          const TVector<double>& Weights; -  -    public:  + +    public:          TSmartHeap(const TVector<double>& weights) -            : A(weights.size())  -            , Pos(weights.size())  -            , Weights(weights)  -        {  -            for (ui32 i = 0; i < weights.size(); ++i) {  -                A[i] = i;  -                Pos[i] = i;  -            }  -            for (ui32 i = weights.size() / 2; i > 0; --i) {  -                Down(i - 1);  -            }  -        }  -  -        ui32 IdOfMin() {  -            return A[0];  -        }  -  -        void Pop() {  -            A[0] = A.back();  -            Pos[A[0]] = 0;  -            A.pop_back();  -            Down(0);  -        }  -  -        void DownElement(ui32 id) {  -            Down(Pos[id]);  -        }  -  -    private:  -        void SwapPositions(ui32 x, ui32 y) {  +            : A(weights.size()) +            , Pos(weights.size()) +            , Weights(weights) +        { +            for (ui32 i = 0; i < weights.size(); ++i) { +                A[i] = i; +                Pos[i] = i; +            } +            for (ui32 i = weights.size() / 2; i > 0; --i) { +                Down(i - 1); +            } +        } + +        ui32 IdOfMin() { +            return A[0]; +        } + +        void Pop() { +            A[0] = A.back(); +            Pos[A[0]] = 0; +            A.pop_back(); +            Down(0); +        } + +        void DownElement(ui32 id) { +            Down(Pos[id]); +        } + +    private: +        void SwapPositions(ui32 x, ui32 y) {              std::swap(A[x], A[y]); -            Pos[A[x]] = x;  -            Pos[A[y]] = y;  -        }  -  -        void Down(ui32 pos) {  -            while (1) {  -                ui32 left = pos * 2 + 1;  -                ui32 right = pos * 2 + 2;  -                ui32 min = pos;  -                if (left < A.size() && Weights[A[min]] > Weights[A[left]])  -                    min = left;  -                if (right < A.size() && Weights[A[min]] > Weights[A[right]])  -                    min = right;  -                if (pos == min)  -                    break;  -                SwapPositions(min, pos);  -                pos = min;  -            }  -        }  -    };  -  -}  -  -namespace NKiwiAggr {  +            Pos[A[x]] = x; +            Pos[A[y]] = y; +        } + +        void Down(ui32 pos) { +            while (1) { +                ui32 left = pos * 2 + 1; +                ui32 right = pos * 2 + 2; +                ui32 min = pos; +                if (left < A.size() && Weights[A[min]] > Weights[A[left]]) +                    min = left; +                if (right < A.size() && Weights[A[min]] > Weights[A[right]]) +                    min = right; +                if (pos == min) +                    break; +                SwapPositions(min, pos); +                pos = min; +            } +        } +    }; + +} + +namespace NKiwiAggr {      ///////////////////      // TBlockHistogram      /////////////////// -  +      TBlockHistogram::TBlockHistogram(EHistogramType type, TQualityFunction calcQuality,                                       size_t intervals, ui64 id, size_t shrinkSize)          : Type(type)          , CalcQuality(calcQuality)          , Intervals(intervals) -        , ShrinkSize(shrinkSize)  -        , PrevSize(0)  -        , Id(id)  -        , Sum(0)  -        , MinValue(0)  -        , MaxValue(0)  -    {  -        CorrectShrinkSize();  -    }  -  +        , ShrinkSize(shrinkSize) +        , PrevSize(0) +        , Id(id) +        , Sum(0) +        , MinValue(0) +        , MaxValue(0) +    { +        CorrectShrinkSize(); +    } +      void TBlockHistogram::Clear() {          PrevSize = 0;          Sum = 0.0;          MinValue = 0.0;          MaxValue = 0.0;          Bins.clear(); -    }  -  +    } +      void TBlockHistogram::Add(const THistoRec& rec) { -        if (!rec.HasId() || rec.GetId() == Id) {  -            Add(rec.GetValue(), rec.GetWeight());  -        }  -    }  -  +        if (!rec.HasId() || rec.GetId() == Id) { +            Add(rec.GetValue(), rec.GetWeight()); +        } +    } +      void TBlockHistogram::Add(double value, double weight) {          if (!IsValidFloat(value) || !IsValidFloat(weight)) {              ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);          } -        if (weight <= 0.0) {  -            return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden  -        }  -  -        if (Bins.empty()) {  -            MinValue = value;  -            MaxValue = value;  +        if (weight <= 0.0) { +            return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden +        } + +        if (Bins.empty()) { +            MinValue = value; +            MaxValue = value;          } else {              MinValue = Min(MinValue, value);              MaxValue = Max(MaxValue, value); -        }  -  -        Sum += weight;  -  +        } + +        Sum += weight; +          if (Bins.size() > ShrinkSize) { -            SortAndShrink(Intervals * SHRINK_MULTIPLIER);  +            SortAndShrink(Intervals * SHRINK_MULTIPLIER);          } -  -        Bins.push_back(TWeightedValue(value, weight));  -    }  -  + +        Bins.push_back(TWeightedValue(value, weight)); +    } +      void TBlockHistogram::Merge(const THistogram& histo, double multiplier) {          if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {              fprintf(stderr, "Merging in histogram id %lu: skip bad histo with minvalue %f maxvalue %f\n", Id, histo.GetMinValue(), histo.GetMaxValue()); @@ -214,18 +214,18 @@ namespace NKiwiAggr {                  break; // ok              default:   // not ok                  ythrow yexception() << "Attempt to parse TBlockHistogram from THistogram protobuf record of type = " << (ui32)histo.GetType(); -        }  +        } -        if (histo.FreqSize() != histo.PositionSize()) {  +        if (histo.FreqSize() != histo.PositionSize()) {              ythrow yexception() << "Attempt to parse TBlockHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize(); -        }  -        Id = histo.GetId();  -        Sum = 0;  -        Intervals = Max(Intervals, histo.FreqSize());  -        CorrectShrinkSize();  -        Bins.resize(histo.FreqSize());  -        PrevSize = Bins.size();  -        for (size_t i = 0; i < histo.FreqSize(); ++i) {  +        } +        Id = histo.GetId(); +        Sum = 0; +        Intervals = Max(Intervals, histo.FreqSize()); +        CorrectShrinkSize(); +        Bins.resize(histo.FreqSize()); +        PrevSize = Bins.size(); +        for (size_t i = 0; i < histo.FreqSize(); ++i) {              double value = histo.GetPosition(i);              double weight = histo.GetFreq(i);              if (!IsValidFloat(value) || !IsValidFloat(weight)) { @@ -234,92 +234,92 @@ namespace NKiwiAggr {              }              Bins[i].first = value;              Bins[i].second = weight; -            Sum += Bins[i].second;  -        }  +            Sum += Bins[i].second; +        }          if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {              ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue());          } -        MinValue = histo.GetMinValue();  -        MaxValue = histo.GetMaxValue();  -    }  -  +        MinValue = histo.GetMinValue(); +        MaxValue = histo.GetMaxValue(); +    } +      void TBlockHistogram::ToProto(THistogram& histo) { -        histo.Clear();  +        histo.Clear();          histo.SetType(Type); -        histo.SetId(Id);  -        if (Empty()) {  -            return;  -        }  -  -        SortAndShrink(Intervals, true);  -        histo.SetMinValue(MinValue);  -        histo.SetMaxValue(MaxValue);  +        histo.SetId(Id); +        if (Empty()) { +            return; +        } + +        SortAndShrink(Intervals, true); +        histo.SetMinValue(MinValue); +        histo.SetMaxValue(MaxValue);          for (TVector<TWeightedValue>::const_iterator it = Bins.begin(); it != Bins.end(); ++it) { -            histo.AddFreq(it->second);  -            histo.AddPosition(it->first);  -        }  -    }  -  +            histo.AddFreq(it->second); +            histo.AddPosition(it->first); +        } +    } +      void TBlockHistogram::SetId(ui64 id) {          Id = id;      }      ui64 TBlockHistogram::GetId() { -        return Id;  -    }  -  +        return Id; +    } +      bool TBlockHistogram::Empty() { -        return Bins.empty();  -    }  -  +        return Bins.empty(); +    } +      double TBlockHistogram::GetMinValue() {          return MinValue; -    }  -  +    } +      double TBlockHistogram::GetMaxValue() {          return MaxValue; -    }  -  +    } +      double TBlockHistogram::GetSum() { -        return Sum;  -    }  -  +        return Sum; +    } +      void TBlockHistogram::SortAndShrink(size_t intervals, bool final) {          Y_VERIFY(intervals > 0); -  +          if (Bins.size() <= intervals) { -            return;  +            return;          } -  -        if (Bins.size() >= Intervals * GREEDY_SHRINK_MULTIPLIER) {  -            SortBins();  + +        if (Bins.size() >= Intervals * GREEDY_SHRINK_MULTIPLIER) { +            SortBins();              UniquifyBins(); -            FastGreedyShrink(intervals);  -  -            if (final) {  -                SlowShrink(intervals);  -            }  -  +            FastGreedyShrink(intervals); + +            if (final) { +                SlowShrink(intervals); +            } +          } else { -            SortBins();  +            SortBins();              UniquifyBins(); -            SlowShrink(intervals);  -        }  -    }  -  +            SlowShrink(intervals); +        } +    } +      void TBlockHistogram::SortBins() { -        Sort(Bins.begin() + PrevSize, Bins.end());  -        if (PrevSize != 0) {  +        Sort(Bins.begin() + PrevSize, Bins.end()); +        if (PrevSize != 0) {              TVector<TWeightedValue> temp(Bins.begin(), Bins.begin() + PrevSize);              std::merge(temp.begin(), temp.end(), Bins.begin() + PrevSize, Bins.end(), Bins.begin()); -        }  -    }  -  +        } +    } +      void TBlockHistogram::UniquifyBins() {          if (Bins.empty()) -            return;  -  +            return; +          auto it1 = Bins.begin();          auto it2 = Bins.begin();          while (++it2 != Bins.end()) { @@ -327,124 +327,124 @@ namespace NKiwiAggr {                  it1->second += it2->second;              } else {                  *(++it1) = *it2; -            }  -        }  -  +            } +        } +          Bins.erase(++it1, Bins.end()); -    }  -  +    } +      void TBlockHistogram::CorrectShrinkSize() {          ShrinkSize = Max(ShrinkSize, Intervals * (SHRINK_MULTIPLIER + GREEDY_SHRINK_MULTIPLIER));      }      void TBlockHistogram::SlowShrink(size_t intervals) { -        {  -            size_t pos = 0;  -            for (size_t i = 1; i < Bins.size(); ++i)  -                if (Bins[i].first - Bins[pos].first < 1e-9) {  -                    Bins[pos].second += Bins[i].second;  -                } else {  -                    ++pos;  -                    Bins[pos] = Bins[i];  -                }  -            Bins.resize(pos + 1);  -            PrevSize = pos + 1;  -        }  -  +        { +            size_t pos = 0; +            for (size_t i = 1; i < Bins.size(); ++i) +                if (Bins[i].first - Bins[pos].first < 1e-9) { +                    Bins[pos].second += Bins[i].second; +                } else { +                    ++pos; +                    Bins[pos] = Bins[i]; +                } +            Bins.resize(pos + 1); +            PrevSize = pos + 1; +        } +          if (Bins.size() <= intervals) { -            return;  -        } -  -        typedef TIntrusiveListItem<TEmpty> TListItem;  -  -        ui32 n = Bins.size() - 1;  -        const ui32 end = (ui32)Bins.size();  -  -        TArrayHolder<TListItem> listElementsHolder(new TListItem[end + 1]);  -        TListItem* const bins = listElementsHolder.Get();  -  +            return; +        } + +        typedef TIntrusiveListItem<TEmpty> TListItem; + +        ui32 n = Bins.size() - 1; +        const ui32 end = (ui32)Bins.size(); + +        TArrayHolder<TListItem> listElementsHolder(new TListItem[end + 1]); +        TListItem* const bins = listElementsHolder.Get(); +          for (ui32 i = 1; i <= end; ++i) { -            bins[i].LinkAfter(&bins[i - 1]);  +            bins[i].LinkAfter(&bins[i - 1]);          } -  +          TVector<double> pairWeights(n); -  +          for (ui32 i = 0; i < n; ++i) {              pairWeights[i] = CalcQuality(Bins[i], Bins[i + 1]).first;          } -  -        TSmartHeap heap(pairWeights);  -  -        while (n + 1 > intervals) {  -            ui32 b = heap.IdOfMin();  -            heap.Pop();  -  -            ui32 a = (ui32)(bins[b].Prev() - bins);  -            ui32 c = (ui32)(bins[b].Next() - bins);  -            ui32 d = (ui32)(bins[b].Next()->Next() - bins);  + +        TSmartHeap heap(pairWeights); + +        while (n + 1 > intervals) { +            ui32 b = heap.IdOfMin(); +            heap.Pop(); + +            ui32 a = (ui32)(bins[b].Prev() - bins); +            ui32 c = (ui32)(bins[b].Next() - bins); +            ui32 d = (ui32)(bins[b].Next()->Next() - bins);              Y_VERIFY(Bins[c].second != -1); -  -            double mass = Bins[b].second + Bins[c].second;  -            Bins[c].first = (Bins[b].first * Bins[b].second + Bins[c].first * Bins[c].second) / mass;  -            Bins[c].second = mass;  -  -            bins[b].Unlink();  -            Bins[b].second = -1;  -  -            if (a != end) {  + +            double mass = Bins[b].second + Bins[c].second; +            Bins[c].first = (Bins[b].first * Bins[b].second + Bins[c].first * Bins[c].second) / mass; +            Bins[c].second = mass; + +            bins[b].Unlink(); +            Bins[b].second = -1; + +            if (a != end) {                  pairWeights[a] = CalcQuality(Bins[a], Bins[c]).first; -                heap.DownElement(a);  -            }  -  -            if (d != end && c + 1 != Bins.size()) {  +                heap.DownElement(a); +            } + +            if (d != end && c + 1 != Bins.size()) {                  pairWeights[c] = CalcQuality(Bins[c], Bins[d]).first; -                heap.DownElement(c);  -            }  -  -            --n;  -        }  -  -        size_t pos = 0;  -        for (TListItem* it = bins[end].Next(); it != &bins[end]; it = it->Next()) {  -            Bins[pos++] = Bins[it - bins];  -        }  -  -        Bins.resize(pos);  -        PrevSize = pos;  +                heap.DownElement(c); +            } + +            --n; +        } + +        size_t pos = 0; +        for (TListItem* it = bins[end].Next(); it != &bins[end]; it = it->Next()) { +            Bins[pos++] = Bins[it - bins]; +        } + +        Bins.resize(pos); +        PrevSize = pos;          Y_VERIFY(pos == intervals); -    }  -  +    } +      double TBlockHistogram::GetSumInRange(double leftBound, double rightBound) {          Y_UNUSED(leftBound);          Y_UNUSED(rightBound);          ythrow yexception() << "Method is not implemented for TBlockHistogram"; -        return 0;  -    }  -  +        return 0; +    } +      double TBlockHistogram::GetSumAboveBound(double bound) {          Y_UNUSED(bound);          ythrow yexception() << "Method is not implemented for TBlockHistogram"; -        return 0;  -    }  -  +        return 0; +    } +      double TBlockHistogram::GetSumBelowBound(double bound) {          Y_UNUSED(bound);          ythrow yexception() << "Method is not implemented for TBlockHistogram"; -        return 0;  -    }  -  +        return 0; +    } +      double TBlockHistogram::CalcUpperBound(double sum) {          Y_UNUSED(sum);          ythrow yexception() << "Method is not implemented for TBlockHistogram"; -        return 0;  -    }  -  +        return 0; +    } +      double TBlockHistogram::CalcLowerBound(double sum) {          Y_UNUSED(sum);          ythrow yexception() << "Method is not implemented for TBlockHistogram"; -        return 0;  -    }  -  +        return 0; +    } +      double TBlockHistogram::CalcUpperBoundSafe(double sum) {          Y_UNUSED(sum);          ythrow yexception() << "Method is not implemented for TBlockHistogram"; diff --git a/library/cpp/histogram/adaptive/block_histogram.h b/library/cpp/histogram/adaptive/block_histogram.h index dda8623c63e..266bb2f2b2f 100644 --- a/library/cpp/histogram/adaptive/block_histogram.h +++ b/library/cpp/histogram/adaptive/block_histogram.h @@ -1,19 +1,19 @@ -#pragma once  -  -#include "histogram.h"  +#pragma once + +#include "histogram.h"  #include "common.h" -  +  #include <library/cpp/histogram/adaptive/protos/histo.pb.h> -  -#include <util/generic/ptr.h>  -#include <util/generic/vector.h>  + +#include <util/generic/ptr.h> +#include <util/generic/vector.h>  #include <utility> -  -namespace NKiwiAggr {  + +namespace NKiwiAggr {      ///////////////////      // TBlockHistogram      /////////////////// -  +      /**       * Contrary to adaptive histogram, block histogram doesn't rebuild bins       * after the addition of each point. Instead, it accumulates points and in case the amount @@ -28,73 +28,73 @@ namespace NKiwiAggr {      class TBlockHistogram: private TNonCopyable, public IHistogram {      protected:          static const size_t SHRINK_MULTIPLIER = 2; -        static const size_t GREEDY_SHRINK_MULTIPLIER = 4;  +        static const size_t GREEDY_SHRINK_MULTIPLIER = 4;          static const size_t DEFAULT_INTERVALS = 100;          static const size_t DEFAULT_SHRINK_SIZE = DEFAULT_INTERVALS * (SHRINK_MULTIPLIER + GREEDY_SHRINK_MULTIPLIER); -  +          const EHistogramType Type;          const TQualityFunction CalcQuality; -        size_t Intervals;  -        size_t ShrinkSize;  -        size_t PrevSize;  -  +        size_t Intervals; +        size_t ShrinkSize; +        size_t PrevSize; +          ui64 Id; -  -        double Sum;  -        double MinValue;  -        double MaxValue;  -  + +        double Sum; +        double MinValue; +        double MaxValue; +          TVector<TWeightedValue> Bins; -  -    public:  + +    public:          TBlockHistogram(EHistogramType type, TQualityFunction calcQuality,                          size_t intervals, ui64 id = 0, size_t shrinkSize = DEFAULT_SHRINK_SIZE); -  +          virtual ~TBlockHistogram() { -        }  -  +        } +          virtual void Clear(); -        virtual void Add(double value, double weight);  -        virtual void Add(const THistoRec& histoRec);  -  +        virtual void Add(double value, double weight); +        virtual void Add(const THistoRec& histoRec); +          virtual void Merge(const THistogram& histo, double multiplier);          virtual void Merge(const TVector<THistogram>& histogramsToMerge);          virtual void Merge(TVector<IHistogramPtr> histogramsToMerge); // not implemented          virtual void Multiply(double factor); -        virtual void FromProto(const THistogram& histo);  -        virtual void ToProto(THistogram& histo);  -  +        virtual void FromProto(const THistogram& histo); +        virtual void ToProto(THistogram& histo); +          virtual void SetId(ui64 id);          virtual ui64 GetId(); -        virtual bool Empty();  -        virtual double GetMinValue();  -        virtual double GetMaxValue();  -        virtual double GetSum();  -  -        // methods below are not implemented  -        virtual double GetSumInRange(double leftBound, double rightBound);  -        virtual double GetSumAboveBound(double bound);  -        virtual double GetSumBelowBound(double bound);  -        virtual double CalcUpperBound(double sum);  -        virtual double CalcLowerBound(double sum);  +        virtual bool Empty(); +        virtual double GetMinValue(); +        virtual double GetMaxValue(); +        virtual double GetSum(); + +        // methods below are not implemented +        virtual double GetSumInRange(double leftBound, double rightBound); +        virtual double GetSumAboveBound(double bound); +        virtual double GetSumBelowBound(double bound); +        virtual double CalcUpperBound(double sum); +        virtual double CalcLowerBound(double sum);          virtual double CalcUpperBoundSafe(double sum);          virtual double CalcLowerBoundSafe(double sum); -  -    private:  -        void SortBins();  + +    private: +        void SortBins();          void UniquifyBins(); -        void CorrectShrinkSize();  -  -        void SortAndShrink(size_t intervals, bool final = false);  +        void CorrectShrinkSize(); + +        void SortAndShrink(size_t intervals, bool final = false); -        void SlowShrink(size_t intervals);  +        void SlowShrink(size_t intervals);          virtual void FastGreedyShrink(size_t intervals) = 0; -    };  -  +    }; +      /////////////////////////      // TBlockWeightHistogram      ///////////////////////// diff --git a/library/cpp/histogram/adaptive/histogram.h b/library/cpp/histogram/adaptive/histogram.h index f4d6f69b6bc..360fd9a6936 100644 --- a/library/cpp/histogram/adaptive/histogram.h +++ b/library/cpp/histogram/adaptive/histogram.h @@ -4,8 +4,8 @@  #include <util/generic/vector.h>  namespace NKiwiAggr { -    class THistogram;  -    class THistoRec;  +    class THistogram; +    class THistoRec;      class IHistogram;      typedef TAtomicSharedPtr<IHistogram> IHistogramPtr;  | 
