aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/histogram
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/histogram
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'library/cpp/histogram')
-rw-r--r--library/cpp/histogram/adaptive/merger.h68
-rw-r--r--library/cpp/histogram/adaptive/multi_histogram.h143
-rw-r--r--library/cpp/histogram/simple/histogram.cpp1
-rw-r--r--library/cpp/histogram/simple/histogram.h140
4 files changed, 141 insertions, 211 deletions
diff --git a/library/cpp/histogram/adaptive/merger.h b/library/cpp/histogram/adaptive/merger.h
deleted file mode 100644
index fc9a6b6a4f9..00000000000
--- a/library/cpp/histogram/adaptive/merger.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#pragma once
-
-#include <util/generic/buffer.h>
-
-namespace NKiwiAggr {
- class IMerger {
- private:
- bool IsMerged;
- ui32 AutoMergeInterval; // Call Merge() after each AutoMergeInterval calls of Add(); zero means no autoMerge
- ui32 NotMergedCount;
-
- public:
- IMerger(ui32 autoMergeInterval = 0)
- : IsMerged(true)
- , AutoMergeInterval(autoMergeInterval)
- , NotMergedCount(0)
- {
- }
-
- virtual ~IMerger() {
- }
-
- // returns true if something is added
- virtual bool Add(const void* data, size_t size) {
- if (AddImpl(data, size)) {
- AutoMerge();
- return true;
- }
- return false;
- }
-
- virtual void Merge() {
- if (!IsMerged) {
- MergeImpl();
- IsMerged = true;
- }
- }
-
- virtual void Reset() {
- ResetImpl();
- IsMerged = true;
- }
-
- // You can add some more result-getters if you want.
- // Do not forget to call Merge() in the beginning of each merger.
- virtual void GetResult(TBuffer& buffer) = 0;
-
- protected:
- // AutoMerge() is called in Add() after each AddImpl()
- void AutoMerge() {
- IsMerged = false;
- if (AutoMergeInterval) {
- ++NotMergedCount;
- if (NotMergedCount >= AutoMergeInterval) {
- MergeImpl();
- IsMerged = true;
- NotMergedCount = 0;
- }
- }
- }
-
- // Implementation of merger: define it in derivatives
- virtual bool AddImpl(const void* data, size_t size) = 0; // returns true if something is added
- virtual void MergeImpl() = 0;
- virtual void ResetImpl() = 0;
- };
-
-}
diff --git a/library/cpp/histogram/adaptive/multi_histogram.h b/library/cpp/histogram/adaptive/multi_histogram.h
deleted file mode 100644
index 41caac5ba68..00000000000
--- a/library/cpp/histogram/adaptive/multi_histogram.h
+++ /dev/null
@@ -1,143 +0,0 @@
-#pragma once
-
-#include "histogram.h"
-#include "auto_histogram.h"
-
-#include <library/cpp/histogram/adaptive/protos/histo.pb.h>
-
-#include <util/generic/hash.h>
-#include <util/generic/ptr.h>
-#include <utility>
-
-namespace NKiwiAggr {
- template <class TMyHistogram>
- class TMultiHistogram {
- private:
- static const size_t DEFAULT_INTERVALS = 100;
-
- typedef THashMap<ui64, IHistogramPtr> THistogramsMap;
- THistogramsMap Histograms;
- size_t Intervals;
-
- public:
- TMultiHistogram(size_t intervals = DEFAULT_INTERVALS)
- : Intervals(intervals)
- {
- }
-
- TMultiHistogram(const THistograms& histograms, size_t defaultIntervals = DEFAULT_INTERVALS)
- : Intervals(defaultIntervals)
- {
- FromProto(histograms);
- }
-
- virtual ~TMultiHistogram() {
- }
-
- void Clear() {
- Histograms.clear();
- }
-
- void Add(const THistoRecs& histoRecs) {
- for (size_t i = 0; i < histoRecs.HistoRecsSize(); ++i) {
- Add(histoRecs.GetHistoRecs(i).GetId(), histoRecs.GetHistoRecs(i).GetValue(), histoRecs.GetHistoRecs(i).GetWeight());
- }
- }
-
- void Add(const THistoRec& histoRec) {
- Add(histoRec.GetId(), histoRec.GetValue(), histoRec.GetWeight());
- }
-
- void Add(ui64 id, double value, double weight) {
- THistogramsMap::const_iterator it = Histograms.find(id);
- if (it == Histograms.end()) {
- it = Histograms.insert(std::make_pair(id, IHistogramPtr(new TMyHistogram(Intervals, id)))).first;
- }
- it->second->Add(value, weight);
- }
-
- void Multiply(double factor) {
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- it->second->Multiply(factor);
- }
- }
-
- TVector<ui64> GetIds() const {
- TVector<ui64> result(0);
- for (THistogramsMap::const_iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- result.push_back(it->first);
- }
- return result;
- }
-
- IHistogramPtr GetHistogram(ui64 id) const {
- THistogramsMap::const_iterator it = Histograms.find(id);
- if (it != Histograms.end()) {
- return it->second;
- }
- return IHistogramPtr();
- }
-
- double GetMaxHistoSum() const {
- double sum = 0.0;
- for (THistogramsMap::const_iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- sum = std::max(sum, it->second->GetSum());
- }
- return sum;
- }
-
- bool Empty() {
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- if (!it->second->Empty()) {
- return false;
- }
- }
- return true;
- }
-
- virtual double OverallSum() {
- double sum = 0.0;
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- sum += it->second->GetSum();
- }
- return sum;
- }
-
- void FromProto(const THistograms& histograms) {
- for (size_t i = 0; i < histograms.HistoRecsSize(); ++i) {
- IHistogramPtr newHisto(new TMyHistogram(histograms.GetHistoRecs(i), Intervals));
- if (!newHisto->Empty()) {
- Histograms[newHisto->GetId()] = newHisto;
- }
- }
- }
-
- void ToProto(THistograms& histograms) {
- histograms.Clear();
- for (THistogramsMap::iterator it = Histograms.begin(); it != Histograms.end(); ++it) {
- THistogram* histo = histograms.AddHistoRecs();
- it->second->ToProto(*histo);
- }
- }
-
- void PrecomputePartialSums() {
- for (auto& it : Histograms) {
- it.second->PrecomputePartialSums();
- }
- }
- };
-
- template <class TMerger, class TSomeMultiHistogram>
- static void MergeToMultiHistogram(const void* data, size_t size, TSomeMultiHistogram& multiHistogram, ui32 intervals = 300) {
- TMerger merger(intervals);
- merger.Add(data, size);
- THistograms histograms;
- merger.GetResult(histograms);
- multiHistogram.FromProto(histograms);
- }
-
- // Good for parsing from THistograms protobuf
- typedef TMultiHistogram<TAutoHistogram> TAutoMultiHistogram;
- typedef TAtomicSharedPtr<TAutoMultiHistogram> TAutoMultiHistogramPtr;
-
-}
diff --git a/library/cpp/histogram/simple/histogram.cpp b/library/cpp/histogram/simple/histogram.cpp
new file mode 100644
index 00000000000..35247627409
--- /dev/null
+++ b/library/cpp/histogram/simple/histogram.cpp
@@ -0,0 +1 @@
+#include "histogram.h"
diff --git a/library/cpp/histogram/simple/histogram.h b/library/cpp/histogram/simple/histogram.h
new file mode 100644
index 00000000000..ceb09efc5c4
--- /dev/null
+++ b/library/cpp/histogram/simple/histogram.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/json/writer/json.h>
+#include <library/cpp/threading/future/async.h>
+
+#include <util/generic/algorithm.h>
+#include <util/generic/hash.h>
+#include <util/generic/utility.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/stream/format.h>
+#include <util/string/builder.h>
+#include <util/thread/pool.h>
+#include <util/system/mutex.h>
+
+namespace NSimpleHistogram {
+ template <typename T>
+ class THistogram {
+ public:
+ explicit THistogram(TVector<T>&& values)
+ : Values_(std::move(values))
+ {
+ }
+
+ size_t TotalCount() const {
+ return Values_.size();
+ }
+
+ T ValueAtPercentile(double percentile) const {
+ Y_ASSERT(!Values_.empty());
+ Y_ASSERT(percentile >= 0.0 && percentile <= 1.0);
+
+ const size_t index = static_cast<size_t>(percentile * Values_.size());
+ return Values_[Min(Values_.size() - 1, index)];
+ }
+
+ private:
+ TVector<T> Values_;
+ };
+
+ template <typename T>
+ class THistogramCalcer {
+ public:
+ size_t TotalCount() const {
+ return Values_.size();
+ }
+
+ void RecordValue(T value) {
+ Values_.push_back(value);
+ }
+
+ THistogram<T> Calc() {
+ if (!IsSorted(Values_.begin(), Values_.end())) {
+ Sort(Values_.begin(), Values_.end());
+ }
+ return THistogram<T>(std::move(Values_));
+ }
+
+ private:
+ TVector<T> Values_;
+ };
+
+ template <typename T>
+ class TMultiHistogramCalcer {
+ public:
+ void RecordValue(TStringBuf name, T value) {
+ Calcers_[name].RecordValue(value);
+ }
+
+ THashMap<TString, THistogram<T>> Calc() {
+ THashMap<TString, THistogram<T>> result;
+
+ for (auto& calcer : Calcers_) {
+ result.emplace(calcer.first, calcer.second.Calc());
+ }
+
+ return result;
+ }
+
+ private:
+ THashMap<TString, THistogramCalcer<T>> Calcers_;
+ };
+
+ template <typename T>
+ class TThreadSafeMultiHistogramCalcer {
+ public:
+ void RecordValue(TStringBuf name, T value) {
+ TGuard<TMutex> guard(Mutex_);
+ Calcer_.RecordValue(name, value);
+ }
+
+ THashMap<TString, THistogram<T>> Calc() {
+ return Calcer_.Calc();
+ }
+
+ private:
+ TMutex Mutex_;
+ TMultiHistogramCalcer<T> Calcer_;
+ };
+
+ template <typename T>
+ NJson::TJsonValue ToJson(const THistogram<T>& hist, const TVector<double>& percentiles) {
+ NJson::TJsonValue json;
+
+ for (double percentile : percentiles) {
+ TStringBuilder name;
+ name << "Q" << Prec(percentile * 100, PREC_POINT_DIGITS_STRIP_ZEROES, 2);
+ json[name] = hist.ValueAtPercentile(percentile);
+ }
+
+ json["RecordCount"] = hist.TotalCount();
+
+ return json;
+ }
+
+ template <typename T>
+ NJson::TJsonValue ToJson(const THashMap<TString, THistogram<T>>& hists, const TVector<double>& percentiles) {
+ NJson::TJsonValue json;
+
+ for (const auto& p : hists) {
+ json[p.first] = ToJson(p.second, percentiles);
+ }
+
+ return json;
+ }
+
+ template <typename T>
+ TString ToJsonStr(const THashMap<TString, THistogram<T>>& hists, const TVector<double>& percentiles, bool format = true) {
+ NJson::TJsonValue json = ToJson(hists, percentiles);
+
+ NJsonWriter::TBuf buf;
+ if (format) {
+ buf.SetIndentSpaces(4);
+ }
+
+ return buf.WriteJsonValue(&json, true).Str();
+ }
+
+}