aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/lwtrace/mon/analytics
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/lwtrace/mon/analytics
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/lwtrace/mon/analytics')
-rw-r--r--library/cpp/lwtrace/mon/analytics/all.h8
-rw-r--r--library/cpp/lwtrace/mon/analytics/analytics.cpp5
-rw-r--r--library/cpp/lwtrace/mon/analytics/csv_output.h52
-rw-r--r--library/cpp/lwtrace/mon/analytics/data.h108
-rw-r--r--library/cpp/lwtrace/mon/analytics/html_output.h86
-rw-r--r--library/cpp/lwtrace/mon/analytics/json_output.h98
-rw-r--r--library/cpp/lwtrace/mon/analytics/transform.h204
-rw-r--r--library/cpp/lwtrace/mon/analytics/util.h122
-rw-r--r--library/cpp/lwtrace/mon/analytics/ya.make15
9 files changed, 698 insertions, 0 deletions
diff --git a/library/cpp/lwtrace/mon/analytics/all.h b/library/cpp/lwtrace/mon/analytics/all.h
new file mode 100644
index 0000000000..02ddfb83f2
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/all.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "csv_output.h"
+#include "data.h"
+#include "html_output.h"
+#include "json_output.h"
+#include "transform.h"
+#include "util.h"
diff --git a/library/cpp/lwtrace/mon/analytics/analytics.cpp b/library/cpp/lwtrace/mon/analytics/analytics.cpp
new file mode 100644
index 0000000000..1b25263386
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/analytics.cpp
@@ -0,0 +1,5 @@
+#include "all.h"
+
+namespace NAnalytics {
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/csv_output.h b/library/cpp/lwtrace/mon/analytics/csv_output.h
new file mode 100644
index 0000000000..90ded32f5d
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/csv_output.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <util/string/printf.h>
+#include <util/stream/str.h>
+#include <util/generic/set.h>
+#include "data.h"
+
+namespace NAnalytics {
+
+inline TString ToCsv(const TTable& in, TString sep = TString("\t"), bool head = true)
+{
+ TSet<TString> cols;
+ bool hasName = false;
+ for (const TRow& row : in) {
+ hasName = hasName || !row.Name.empty();
+ for (const auto& kv : row) {
+ cols.insert(kv.first);
+ }
+ }
+
+ TStringStream ss;
+ if (head) {
+ bool first = true;
+ if (hasName) {
+ ss << (first? TString(): sep) << "Name";
+ first = false;
+ }
+ for (const TString& c : cols) {
+ ss << (first? TString(): sep) << c;
+ first = false;
+ }
+ ss << Endl;
+ }
+
+ for (const TRow& row : in) {
+ bool first = true;
+ if (hasName) {
+ ss << (first? TString(): sep) << row.Name;
+ first = false;
+ }
+ for (const TString& c : cols) {
+ ss << (first? TString(): sep);
+ first = false;
+ TString value;
+ ss << (row.GetAsString(c, value) ? value : TString("-"));
+ }
+ ss << Endl;
+ }
+ return ss.Str();
+}
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/data.h b/library/cpp/lwtrace/mon/analytics/data.h
new file mode 100644
index 0000000000..4b643fe20b
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/data.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/hash.h>
+#include <util/generic/vector.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+#include <variant>
+
+namespace NAnalytics {
+
+using TRowValue = std::variant<i64, ui64, double, TString>;
+
+TString ToString(const TRowValue& val) {
+ TStringBuilder builder;
+ std::visit([&builder] (auto&& arg) {
+ builder << arg;
+ }, val);
+ return builder;
+}
+
+struct TRow : public THashMap<TString, TRowValue> {
+ TString Name;
+
+ template<typename T>
+ bool Get(const TString& name, T& value) const {
+ if constexpr (std::is_same_v<double, T>) {
+ if (name == "_count") { // Special values
+ value = 1.0;
+ return true;
+ }
+ }
+ auto iter = find(name);
+ if (iter != end()) {
+ try {
+ value = std::get<T>(iter->second);
+ return true;
+ } catch (...) {}
+ }
+ return false;
+ }
+
+ template<typename T = double>
+ T GetOrDefault(const TString& name, T dflt = T()) {
+ Get(name, dflt);
+ return dflt;
+ }
+
+ bool GetAsString(const TString& name, TString& value) const {
+ auto iter = find(name);
+ if (iter != end()) {
+ value = ToString(iter->second);
+ return true;
+ }
+ return false;
+ }
+};
+
+using TAttributes = THashMap<TString, TString>;
+
+struct TTable : public TVector<TRow> {
+ TAttributes Attributes;
+};
+
+struct TMatrix : public TVector<double> {
+ size_t Rows;
+ size_t Cols;
+
+ explicit TMatrix(size_t rows = 0, size_t cols = 0)
+ : TVector<double>(rows * cols)
+ , Rows(rows)
+ , Cols(cols)
+ {}
+
+ void Reset(size_t rows, size_t cols)
+ {
+ Rows = rows;
+ Cols = cols;
+ clear();
+ resize(rows * cols);
+ }
+
+ double& Cell(size_t row, size_t col)
+ {
+ Y_VERIFY(row < Rows);
+ Y_VERIFY(col < Cols);
+ return operator[](row * Cols + col);
+ }
+
+ double Cell(size_t row, size_t col) const
+ {
+ Y_VERIFY(row < Rows);
+ Y_VERIFY(col < Cols);
+ return operator[](row * Cols + col);
+ }
+
+ double CellSum() const
+ {
+ double sum = 0.0;
+ for (double x : *this) {
+ sum += x;
+ }
+ return sum;
+ }
+};
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/html_output.h b/library/cpp/lwtrace/mon/analytics/html_output.h
new file mode 100644
index 0000000000..f775f216b9
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/html_output.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <util/string/printf.h>
+#include <util/stream/str.h>
+#include <util/generic/set.h>
+#include "data.h"
+
+namespace NAnalytics {
+
+inline TString ToHtml(const TTable& in)
+{
+ TSet<TString> cols;
+ bool hasName = false;
+ for (const TRow& row : in) {
+ hasName = hasName || !row.Name.empty();
+ for (const auto& kv : row) {
+ cols.insert(kv.first);
+ }
+ }
+
+ TStringStream ss;
+ ss << "<table>";
+ ss << "<thead><tr>";
+ if (hasName) {
+ ss << "<th>Name</th>";
+ }
+ for (const TString& c : cols) {
+ ss << "<th>" << c << "</th>";
+ }
+ ss << "</tr></thead><tbody>";
+
+ for (const TRow& row : in) {
+ ss << "<tr>";
+ if (hasName) {
+ ss << "<th>" << row.Name << "</th>";
+ }
+ for (const TString& c : cols) {
+ TString value;
+ ss << "<td>" << (row.GetAsString(c, value) ? value : TString("-")) << "</td>";
+ }
+ ss << "</tr>";
+ }
+ ss << "</tbody></table>";
+
+ return ss.Str();
+}
+
+inline TString ToTransposedHtml(const TTable& in)
+{
+ TSet<TString> cols;
+ bool hasName = false;
+ for (const TRow& row : in) {
+ hasName = hasName || !row.Name.empty();
+ for (const auto& kv : row) {
+ cols.insert(kv.first);
+ }
+ }
+
+ TStringStream ss;
+ ss << "<table><thead>";
+ if (hasName) {
+ ss << "<tr>";
+ ss << "<th>Name</th>";
+ for (const TRow& row : in) {
+ ss << "<th>" << row.Name << "</th>";
+ }
+ ss << "</tr>";
+ }
+
+ ss << "</thead><tbody>";
+
+ for (const TString& c : cols) {
+ ss << "<tr>";
+ ss << "<th>" << c << "</th>";
+ for (const TRow& row : in) {
+ TString value;
+ ss << "<td>" << (row.GetAsString(c, value) ? value : TString("-")) << "</td>";
+ }
+ ss << "</tr>";
+ }
+ ss << "</tbody></table>";
+
+ return ss.Str();
+}
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/json_output.h b/library/cpp/lwtrace/mon/analytics/json_output.h
new file mode 100644
index 0000000000..189f9802d3
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/json_output.h
@@ -0,0 +1,98 @@
+#pragma once
+
+#include <util/string/printf.h>
+#include <util/stream/str.h>
+#include <util/string/vector.h>
+#include <util/generic/set.h>
+#include <util/generic/hash_set.h>
+#include "data.h"
+#include "util.h"
+
+namespace NAnalytics {
+
+inline TString ToJsonFlot(const TTable& in, const TString& xno, const TVector<TString>& ynos, const TString& opts = TString())
+{
+ TStringStream ss;
+ ss << "[ ";
+ bool first = true;
+
+ TString xn;
+ THashSet<TString> xopts;
+ ParseNameAndOpts(xno, xn, xopts);
+ bool xstack = xopts.contains("stack");
+
+ for (const TString& yno : ynos) {
+ TString yn;
+ THashSet<TString> yopts;
+ ParseNameAndOpts(yno, yn, yopts);
+ bool ystackOpt = yopts.contains("stack");
+
+ ss << (first? "": ",\n ") << "{ " << opts << (opts? ", ": "") << "\"label\": \"" << yn << "\", \"data\": [ ";
+ bool first2 = true;
+ using TPt = std::tuple<double, double, TString>;
+ std::vector<TPt> pts;
+ for (const TRow& row : in) {
+ double x, y;
+ if (row.Get(xn, x) && row.Get(yn, y)) {
+ pts.emplace_back(x, y, row.Name);
+ }
+ }
+
+ if (xstack) {
+ std::sort(pts.begin(), pts.end(), [] (const TPt& a, const TPt& b) {
+ // At first sort by Name, then by x, then by y
+ return std::make_tuple(std::get<2>(a), std::get<0>(a), std::get<1>(a)) <
+ std::make_tuple(std::get<2>(b), std::get<0>(b), std::get<1>(b));
+ });
+ } else {
+ std::sort(pts.begin(), pts.end());
+ }
+
+ double x = 0.0, xsum = 0.0;
+ double y = 0.0, ysum = 0.0;
+ for (auto& pt : pts) {
+ if (xstack) {
+ x = xsum;
+ xsum += std::get<0>(pt);
+ } else {
+ x = std::get<0>(pt);
+ }
+
+ if (ystackOpt) {
+ y = ysum;
+ ysum += std::get<1>(pt);
+ } else {
+ y = std::get<1>(pt);
+ }
+
+ ss << (first2? "": ", ") << "["
+ << Sprintf("%.6lf", Finitize(x)) << ", " // x coordinate
+ << Sprintf("%.6lf", Finitize(y)) << ", " // y coordinate
+ << "\"" << std::get<2>(pt) << "\", " // label
+ << Sprintf("%.6lf", std::get<0>(pt)) << ", " // x label (real value)
+ << Sprintf("%.6lf", std::get<1>(pt)) // y label (real value)
+ << "]";
+ first2 = false;
+ }
+ // Add final point
+ if (!first2 && (xstack || ystackOpt)) {
+ if (xstack)
+ x = xsum;
+ if (ystackOpt)
+ y = ysum;
+ ss << (first2? "": ", ") << "["
+ << Sprintf("%.6lf", Finitize(x)) << ", " // x coordinate
+ << Sprintf("%.6lf", Finitize(y)) << ", " // y coordinate
+ << "\"\", "
+ << Sprintf("%.6lf", x) << ", " // x label (real value)
+ << Sprintf("%.6lf", y) // y label (real value)
+ << "]";
+ }
+ ss << " ] }";
+ first = false;
+ }
+ ss << "\n]";
+ return ss.Str();
+}
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/transform.h b/library/cpp/lwtrace/mon/analytics/transform.h
new file mode 100644
index 0000000000..f7dc9adb5b
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/transform.h
@@ -0,0 +1,204 @@
+#pragma once
+
+#include "data.h"
+
+namespace NAnalytics {
+
+template <class TSkip, class TX, class TY>
+inline TTable Histogram(const TTable& in, TSkip skip,
+ const TString& xn_out, TX x_in,
+ const TString& yn_out, TY y_in,
+ double x1, double x2, double dx)
+{
+ long buckets = (x2 - x1) / dx;
+ TTable out;
+ TString yn_sum = yn_out + "_sum";
+ TString yn_share = yn_out + "_share";
+ double ysum = 0.0;
+ out.resize(buckets);
+ for (size_t i = 0; i < out.size(); i++) {
+ double lb = x1 + dx*i;
+ double ub = lb + dx;
+ out[i].Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")");
+ out[i][xn_out] = (lb + ub) / 2;
+ out[i][yn_sum] = 0.0;
+ }
+ for (const auto& row : in) {
+ if (skip(row)) {
+ continue;
+ }
+ double x = x_in(row);
+ long i = (x - x1) / dx;
+ if (x == x2) { // Special hack to include right edge
+ i--;
+ }
+ double y = y_in(row);
+ ysum += y;
+ if (i >= 0 && i < buckets) {
+ out[i][yn_sum] = y + out[i].GetOrDefault(yn_sum, 0.0);
+ }
+ }
+ for (TRow& row : out) {
+ if (ysum != 0.0) {
+ row[yn_share] = row.GetOrDefault(yn_sum, 0.0) / ysum;
+ }
+ }
+ return out;
+}
+
+inline TTable HistogramAll(const TTable& in, const TString& xn, double x1, double x2, double dx)
+{
+ long buckets = (dx == 0.0? 1: (x2 - x1) / dx);
+ TTable out;
+ THashMap<TString, double> colSum;
+ out.resize(buckets);
+
+ TSet<TString> cols;
+ for (auto& row : in) {
+ for (auto& kv : row) {
+ cols.insert(kv.first);
+ }
+ }
+ cols.insert("_count");
+ cols.erase(xn);
+
+ for (const TString& col : cols) {
+ colSum[col] = 0.0;
+ }
+
+ for (size_t i = 0; i < out.size(); i++) {
+ double lb = x1 + dx*i;
+ double ub = lb + dx;
+ TRow& row = out[i];
+ row.Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")");
+ row[xn] = (lb + ub) / 2;
+ for (const TString& col : cols) {
+ row[col + "_sum"] = 0.0;
+ }
+ }
+ for (const TRow& row_in : in) {
+ double x;
+ if (!row_in.Get(xn, x)) {
+ continue;
+ }
+ long i = (dx == 0.0? 0: (x - x1) / dx);
+ if (x == x2 && dx > 0.0) { // Special hack to include right edge
+ i--;
+ }
+ for (const auto& kv : row_in) {
+ const TString& yn = kv.first;
+ if (yn == xn) {
+ continue;
+ }
+ double y;
+ if (!row_in.Get(yn, y)) {
+ continue;
+ }
+ colSum[yn] += y;
+ if (i >= 0 && i < buckets) {
+ out[i][yn + "_cnt"] = out[i].GetOrDefault(yn + "_cnt") + 1;
+ out[i][yn + "_sum"] = out[i].GetOrDefault(yn + "_sum") + y;
+ if (out[i].contains(yn + "_min")) {
+ out[i][yn + "_min"] = Min(y, out[i].GetOrDefault(yn + "_min"));
+ } else {
+ out[i][yn + "_min"] = y;
+ }
+ if (out[i].contains(yn + "_max")) {
+ out[i][yn + "_max"] = Max(y, out[i].GetOrDefault(yn + "_max"));
+ } else {
+ out[i][yn + "_max"] = y;
+ }
+ }
+ }
+ colSum["_count"]++;
+ if (i >= 0 && i < buckets) {
+ out[i]["_count_sum"] = out[i].GetOrDefault("_count_sum") + 1;
+ }
+ }
+ for (TRow& row : out) {
+ for (const TString& col : cols) {
+ double ysum = colSum[col];
+ if (col != "_count") {
+ if (row.GetOrDefault(col + "_cnt") != 0.0) {
+ row[col + "_avg"] = row.GetOrDefault(col + "_sum") / row.GetOrDefault(col + "_cnt");
+ }
+ }
+ if (ysum != 0.0) {
+ row[col + "_share"] = row.GetOrDefault(col + "_sum") / ysum;
+ }
+ }
+ }
+ return out;
+}
+
+inline TMatrix CovarianceMatrix(const TTable& in)
+{
+ TSet<TString> cols;
+ for (auto& row : in) {
+ for (auto& kv : row) {
+ cols.insert(kv.first);
+ }
+ }
+
+ struct TAggregate {
+ size_t Idx = 0;
+ double Sum = 0;
+ size_t Count = 0;
+ double Mean = 0;
+ };
+
+ THashMap<TString, TAggregate> colAggr;
+
+ size_t colCount = 0;
+ for (const TString& col : cols) {
+ TAggregate& aggr = colAggr[col];
+ aggr.Idx = colCount++;
+ }
+
+ for (const TRow& row : in) {
+ for (const auto& kv : row) {
+ const TString& xn = kv.first;
+ double x;
+ if (!row.Get(xn, x)) {
+ continue;
+ }
+ TAggregate& aggr = colAggr[xn];
+ aggr.Sum += x;
+ aggr.Count++;
+ }
+ }
+
+ for (auto& kv : colAggr) {
+ TAggregate& aggr = kv.second;
+ aggr.Mean = aggr.Sum / aggr.Count;
+ }
+
+ TMatrix covCount(cols.size(), cols.size());
+ TMatrix cov(cols.size(), cols.size());
+ for (const TRow& row : in) {
+ for (const auto& kv1 : row) {
+ double x;
+ if (!row.Get(kv1.first, x)) {
+ continue;
+ }
+ TAggregate& xaggr = colAggr[kv1.first];
+ for (const auto& kv2 : row) {
+ double y;
+ if (!row.Get(kv2.first, y)) {
+ continue;
+ }
+ TAggregate& yaggr = colAggr[kv2.first];
+ covCount.Cell(xaggr.Idx, yaggr.Idx)++;
+ cov.Cell(xaggr.Idx, yaggr.Idx) += (x - xaggr.Mean) * (y - yaggr.Mean);
+ }
+ }
+ }
+
+ for (size_t idx = 0; idx < cov.size(); idx++) {
+ cov[idx] /= covCount[idx];
+ }
+
+ return cov;
+}
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/util.h b/library/cpp/lwtrace/mon/analytics/util.h
new file mode 100644
index 0000000000..e07d06cc43
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/util.h
@@ -0,0 +1,122 @@
+#pragma once
+
+#include "data.h"
+#include <util/generic/algorithm.h>
+#include <util/generic/hash_set.h>
+#include <util/string/vector.h>
+
+namespace NAnalytics {
+
+// Get rid of NaNs and INFs
+inline double Finitize(double x, double notFiniteValue = 0.0)
+{
+ return isfinite(x)? x: notFiniteValue;
+}
+
+inline void ParseNameAndOpts(const TString& nameAndOpts, TString& name, THashSet<TString>& opts)
+{
+ name.clear();
+ opts.clear();
+ bool first = true;
+ auto vs = SplitString(nameAndOpts, "-");
+ for (const auto& s : vs) {
+ if (first) {
+ name = s;
+ first = false;
+ } else {
+ opts.insert(s);
+ }
+ }
+}
+
+inline TString ParseName(const TString& nameAndOpts)
+{
+ auto vs = SplitString(nameAndOpts, "-");
+ if (vs.empty()) {
+ return TString();
+ } else {
+ return vs[0];
+ }
+}
+
+template <class R, class T>
+inline R AccumulateIfExist(const TString& name, const TTable& table, R r, T t)
+{
+ ForEach(table.begin(), table.end(), [=,&r] (const TRow& row) {
+ double value;
+ if (row.Get(name, value)) {
+ r = t(r, value);
+ }
+ });
+ return r;
+}
+
+inline double MinValue(const TString& nameAndOpts, const TTable& table)
+{
+ TString name;
+ THashSet<TString> opts;
+ ParseNameAndOpts(nameAndOpts, name, opts);
+ bool stack = opts.contains("stack");
+ if (stack) {
+ return 0.0;
+ } else {
+ auto zero = 0.0;
+
+ return AccumulateIfExist(name, table, 1.0 / zero /*+inf*/, [] (double x, double y) {
+ return Min(x, y);
+ });
+ }
+}
+
+inline double MaxValue(const TString& nameAndOpts, const TTable& table)
+{
+ TString name;
+ THashSet<TString> opts;
+ ParseNameAndOpts(nameAndOpts, name, opts);
+ bool stack = opts.contains("stack");
+ if (stack) {
+ return AccumulateIfExist(name, table, 0.0, [] (double x, double y) {
+ return x + y;
+ });
+ } else {
+ auto zero = 0.0;
+
+ return AccumulateIfExist(name, table, -1.0 / zero /*-inf*/, [] (double x, double y) {
+ return Max(x, y);
+ });
+ }
+}
+
+template <class T>
+inline void Map(TTable& table, const TString& rname, T t)
+{
+ ForEach(table.begin(), table.end(), [=] (TRow& row) {
+ row[rname] = t(row);
+ });
+}
+
+inline std::function<bool(const TRow&)> HasNoValueFor(TString name)
+{
+ return [=] (const TRow& row) -> bool {
+ double value;
+ return !row.Get(name, value);
+ };
+}
+
+
+inline std::function<double(const TRow&)> GetValueFor(TString name, double defVal = 0.0)
+{
+ return [=] (const TRow& row) -> double {
+ double value;
+ return row.Get(name, value)? value: defVal;
+ };
+}
+
+inline std::function<double(const TRow&)> Const(double defVal = 0.0)
+{
+ return [=] (const TRow&) {
+ return defVal;
+ };
+}
+
+}
diff --git a/library/cpp/lwtrace/mon/analytics/ya.make b/library/cpp/lwtrace/mon/analytics/ya.make
new file mode 100644
index 0000000000..c18e23c8e1
--- /dev/null
+++ b/library/cpp/lwtrace/mon/analytics/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+OWNER(serxa g:kikimr)
+
+PEERDIR(
+)
+
+SRCS(
+ analytics.cpp
+)
+
+END()
+
+RECURSE(
+)