diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/histogram | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'library/cpp/histogram')
-rw-r--r-- | library/cpp/histogram/adaptive/merger.h | 68 | ||||
-rw-r--r-- | library/cpp/histogram/adaptive/multi_histogram.h | 143 | ||||
-rw-r--r-- | library/cpp/histogram/simple/histogram.cpp | 1 | ||||
-rw-r--r-- | library/cpp/histogram/simple/histogram.h | 140 |
4 files changed, 141 insertions, 211 deletions
diff --git a/library/cpp/histogram/adaptive/merger.h b/library/cpp/histogram/adaptive/merger.h deleted file mode 100644 index fc9a6b6a4f9..00000000000 --- a/library/cpp/histogram/adaptive/merger.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once - -#include <util/generic/buffer.h> - -namespace NKiwiAggr { - class IMerger { - private: - bool IsMerged; - ui32 AutoMergeInterval; // Call Merge() after each AutoMergeInterval calls of Add(); zero means no autoMerge - ui32 NotMergedCount; - - public: - IMerger(ui32 autoMergeInterval = 0) - : IsMerged(true) - , AutoMergeInterval(autoMergeInterval) - , NotMergedCount(0) - { - } - - virtual ~IMerger() { - } - - // returns true if something is added - virtual bool Add(const void* data, size_t size) { - if (AddImpl(data, size)) { - AutoMerge(); - return true; - } - return false; - } - - virtual void Merge() { - if (!IsMerged) { - MergeImpl(); - IsMerged = true; - } - } - - virtual void Reset() { - ResetImpl(); - IsMerged = true; - } - - // You can add some more result-getters if you want. - // Do not forget to call Merge() in the beginning of each merger. - virtual void GetResult(TBuffer& buffer) = 0; - - protected: - // AutoMerge() is called in Add() after each AddImpl() - void AutoMerge() { - IsMerged = false; - if (AutoMergeInterval) { - ++NotMergedCount; - if (NotMergedCount >= AutoMergeInterval) { - MergeImpl(); - IsMerged = true; - NotMergedCount = 0; - } - } - } - - // Implementation of merger: define it in derivatives - virtual bool AddImpl(const void* data, size_t size) = 0; // returns true if something is added - virtual void MergeImpl() = 0; - virtual void ResetImpl() = 0; - }; - -} diff --git a/library/cpp/histogram/adaptive/multi_histogram.h b/library/cpp/histogram/adaptive/multi_histogram.h deleted file mode 100644 index 41caac5ba68..00000000000 --- a/library/cpp/histogram/adaptive/multi_histogram.h +++ /dev/null @@ -1,143 +0,0 @@ -#pragma once - -#include "histogram.h" -#include "auto_histogram.h" - -#include <library/cpp/histogram/adaptive/protos/histo.pb.h> - -#include <util/generic/hash.h> -#include <util/generic/ptr.h> -#include <utility> - -namespace NKiwiAggr { - template <class TMyHistogram> - class TMultiHistogram { - private: - static const size_t DEFAULT_INTERVALS = 100; - - typedef THashMap<ui64, IHistogramPtr> THistogramsMap; - THistogramsMap Histograms; - size_t Intervals; - - public: - TMultiHistogram(size_t intervals = DEFAULT_INTERVALS) - : Intervals(intervals) - { - } - - TMultiHistogram(const THistograms& histograms, size_t defaultIntervals = DEFAULT_INTERVALS) - : Intervals(defaultIntervals) - { - FromProto(histograms); - } - - virtual ~TMultiHistogram() { - } - - void Clear() { - Histograms.clear(); - } - - void Add(const THistoRecs& histoRecs) { - for (size_t i = 0; i < histoRecs.HistoRecsSize(); ++i) { - Add(histoRecs.GetHistoRecs(i).GetId(), histoRecs.GetHistoRecs(i).GetValue(), histoRecs.GetHistoRecs(i).GetWeight()); - } - } - - void Add(const THistoRec& histoRec) { - Add(histoRec.GetId(), histoRec.GetValue(), histoRec.GetWeight()); - } - - void Add(ui64 id, double value, double weight) { - THistogramsMap::const_iterator it = Histograms.find(id); - if (it == Histograms.end()) { - it = Histograms.insert(std::make_pair(id, IHistogramPtr(new TMyHistogram(Intervals, id)))).first; - } - it->second->Add(value, weight); - } - - void Multiply(double factor) { - for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) { - it->second->Multiply(factor); - } - } - - TVector<ui64> GetIds() const { - TVector<ui64> result(0); - for (THistogramsMap::const_iterator it = Histograms.begin(); it != Histograms.end(); ++it) { - result.push_back(it->first); - } - return result; - } - - IHistogramPtr GetHistogram(ui64 id) const { - THistogramsMap::const_iterator it = Histograms.find(id); - if (it != Histograms.end()) { - return it->second; - } - return IHistogramPtr(); - } - - double GetMaxHistoSum() const { - double sum = 0.0; - for (THistogramsMap::const_iterator it = Histograms.begin(); it != Histograms.end(); ++it) { - sum = std::max(sum, it->second->GetSum()); - } - return sum; - } - - bool Empty() { - for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) { - if (!it->second->Empty()) { - return false; - } - } - return true; - } - - virtual double OverallSum() { - double sum = 0.0; - for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) { - sum += it->second->GetSum(); - } - return sum; - } - - void FromProto(const THistograms& histograms) { - for (size_t i = 0; i < histograms.HistoRecsSize(); ++i) { - IHistogramPtr newHisto(new TMyHistogram(histograms.GetHistoRecs(i), Intervals)); - if (!newHisto->Empty()) { - Histograms[newHisto->GetId()] = newHisto; - } - } - } - - void ToProto(THistograms& histograms) { - histograms.Clear(); - for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) { - THistogram* histo = histograms.AddHistoRecs(); - it->second->ToProto(*histo); - } - } - - void PrecomputePartialSums() { - for (auto& it : Histograms) { - it.second->PrecomputePartialSums(); - } - } - }; - - template <class TMerger, class TSomeMultiHistogram> - static void MergeToMultiHistogram(const void* data, size_t size, TSomeMultiHistogram& multiHistogram, ui32 intervals = 300) { - TMerger merger(intervals); - merger.Add(data, size); - THistograms histograms; - merger.GetResult(histograms); - multiHistogram.FromProto(histograms); - } - - // Good for parsing from THistograms protobuf - typedef TMultiHistogram<TAutoHistogram> TAutoMultiHistogram; - typedef TAtomicSharedPtr<TAutoMultiHistogram> TAutoMultiHistogramPtr; - -} diff --git a/library/cpp/histogram/simple/histogram.cpp b/library/cpp/histogram/simple/histogram.cpp new file mode 100644 index 00000000000..35247627409 --- /dev/null +++ b/library/cpp/histogram/simple/histogram.cpp @@ -0,0 +1 @@ +#include "histogram.h" diff --git a/library/cpp/histogram/simple/histogram.h b/library/cpp/histogram/simple/histogram.h new file mode 100644 index 00000000000..ceb09efc5c4 --- /dev/null +++ b/library/cpp/histogram/simple/histogram.h @@ -0,0 +1,140 @@ +#pragma once + +#include <library/cpp/json/json_value.h> +#include <library/cpp/json/writer/json.h> +#include <library/cpp/threading/future/async.h> + +#include <util/generic/algorithm.h> +#include <util/generic/hash.h> +#include <util/generic/utility.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> +#include <util/stream/format.h> +#include <util/string/builder.h> +#include <util/thread/pool.h> +#include <util/system/mutex.h> + +namespace NSimpleHistogram { + template <typename T> + class THistogram { + public: + explicit THistogram(TVector<T>&& values) + : Values_(std::move(values)) + { + } + + size_t TotalCount() const { + return Values_.size(); + } + + T ValueAtPercentile(double percentile) const { + Y_ASSERT(!Values_.empty()); + Y_ASSERT(percentile >= 0.0 && percentile <= 1.0); + + const size_t index = static_cast<size_t>(percentile * Values_.size()); + return Values_[Min(Values_.size() - 1, index)]; + } + + private: + TVector<T> Values_; + }; + + template <typename T> + class THistogramCalcer { + public: + size_t TotalCount() const { + return Values_.size(); + } + + void RecordValue(T value) { + Values_.push_back(value); + } + + THistogram<T> Calc() { + if (!IsSorted(Values_.begin(), Values_.end())) { + Sort(Values_.begin(), Values_.end()); + } + return THistogram<T>(std::move(Values_)); + } + + private: + TVector<T> Values_; + }; + + template <typename T> + class TMultiHistogramCalcer { + public: + void RecordValue(TStringBuf name, T value) { + Calcers_[name].RecordValue(value); + } + + THashMap<TString, THistogram<T>> Calc() { + THashMap<TString, THistogram<T>> result; + + for (auto& calcer : Calcers_) { + result.emplace(calcer.first, calcer.second.Calc()); + } + + return result; + } + + private: + THashMap<TString, THistogramCalcer<T>> Calcers_; + }; + + template <typename T> + class TThreadSafeMultiHistogramCalcer { + public: + void RecordValue(TStringBuf name, T value) { + TGuard<TMutex> guard(Mutex_); + Calcer_.RecordValue(name, value); + } + + THashMap<TString, THistogram<T>> Calc() { + return Calcer_.Calc(); + } + + private: + TMutex Mutex_; + TMultiHistogramCalcer<T> Calcer_; + }; + + template <typename T> + NJson::TJsonValue ToJson(const THistogram<T>& hist, const TVector<double>& percentiles) { + NJson::TJsonValue json; + + for (double percentile : percentiles) { + TStringBuilder name; + name << "Q" << Prec(percentile * 100, PREC_POINT_DIGITS_STRIP_ZEROES, 2); + json[name] = hist.ValueAtPercentile(percentile); + } + + json["RecordCount"] = hist.TotalCount(); + + return json; + } + + template <typename T> + NJson::TJsonValue ToJson(const THashMap<TString, THistogram<T>>& hists, const TVector<double>& percentiles) { + NJson::TJsonValue json; + + for (const auto& p : hists) { + json[p.first] = ToJson(p.second, percentiles); + } + + return json; + } + + template <typename T> + TString ToJsonStr(const THashMap<TString, THistogram<T>>& hists, const TVector<double>& percentiles, bool format = true) { + NJson::TJsonValue json = ToJson(hists, percentiles); + + NJsonWriter::TBuf buf; + if (format) { + buf.SetIndentSpaces(4); + } + + return buf.WriteJsonValue(&json, true).Str(); + } + +} |