diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/lwtrace/mon/analytics | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/lwtrace/mon/analytics')
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/all.h | 8 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/analytics.cpp | 5 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/csv_output.h | 52 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/data.h | 108 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/html_output.h | 86 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/json_output.h | 98 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/transform.h | 204 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/util.h | 122 | ||||
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/ya.make | 15 |
9 files changed, 698 insertions, 0 deletions
diff --git a/library/cpp/lwtrace/mon/analytics/all.h b/library/cpp/lwtrace/mon/analytics/all.h new file mode 100644 index 0000000000..02ddfb83f2 --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/all.h @@ -0,0 +1,8 @@ +#pragma once + +#include "csv_output.h" +#include "data.h" +#include "html_output.h" +#include "json_output.h" +#include "transform.h" +#include "util.h" diff --git a/library/cpp/lwtrace/mon/analytics/analytics.cpp b/library/cpp/lwtrace/mon/analytics/analytics.cpp new file mode 100644 index 0000000000..1b25263386 --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/analytics.cpp @@ -0,0 +1,5 @@ +#include "all.h" + +namespace NAnalytics { + +} diff --git a/library/cpp/lwtrace/mon/analytics/csv_output.h b/library/cpp/lwtrace/mon/analytics/csv_output.h new file mode 100644 index 0000000000..90ded32f5d --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/csv_output.h @@ -0,0 +1,52 @@ +#pragma once + +#include <util/string/printf.h> +#include <util/stream/str.h> +#include <util/generic/set.h> +#include "data.h" + +namespace NAnalytics { + +inline TString ToCsv(const TTable& in, TString sep = TString("\t"), bool head = true) +{ + TSet<TString> cols; + bool hasName = false; + for (const TRow& row : in) { + hasName = hasName || !row.Name.empty(); + for (const auto& kv : row) { + cols.insert(kv.first); + } + } + + TStringStream ss; + if (head) { + bool first = true; + if (hasName) { + ss << (first? TString(): sep) << "Name"; + first = false; + } + for (const TString& c : cols) { + ss << (first? TString(): sep) << c; + first = false; + } + ss << Endl; + } + + for (const TRow& row : in) { + bool first = true; + if (hasName) { + ss << (first? TString(): sep) << row.Name; + first = false; + } + for (const TString& c : cols) { + ss << (first? TString(): sep); + first = false; + TString value; + ss << (row.GetAsString(c, value) ? value : TString("-")); + } + ss << Endl; + } + return ss.Str(); +} + +} diff --git a/library/cpp/lwtrace/mon/analytics/data.h b/library/cpp/lwtrace/mon/analytics/data.h new file mode 100644 index 0000000000..4b643fe20b --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/data.h @@ -0,0 +1,108 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/string/builder.h> +#include <util/string/cast.h> + +#include <variant> + +namespace NAnalytics { + +using TRowValue = std::variant<i64, ui64, double, TString>; + +TString ToString(const TRowValue& val) { + TStringBuilder builder; + std::visit([&builder] (auto&& arg) { + builder << arg; + }, val); + return builder; +} + +struct TRow : public THashMap<TString, TRowValue> { + TString Name; + + template<typename T> + bool Get(const TString& name, T& value) const { + if constexpr (std::is_same_v<double, T>) { + if (name == "_count") { // Special values + value = 1.0; + return true; + } + } + auto iter = find(name); + if (iter != end()) { + try { + value = std::get<T>(iter->second); + return true; + } catch (...) {} + } + return false; + } + + template<typename T = double> + T GetOrDefault(const TString& name, T dflt = T()) { + Get(name, dflt); + return dflt; + } + + bool GetAsString(const TString& name, TString& value) const { + auto iter = find(name); + if (iter != end()) { + value = ToString(iter->second); + return true; + } + return false; + } +}; + +using TAttributes = THashMap<TString, TString>; + +struct TTable : public TVector<TRow> { + TAttributes Attributes; +}; + +struct TMatrix : public TVector<double> { + size_t Rows; + size_t Cols; + + explicit TMatrix(size_t rows = 0, size_t cols = 0) + : TVector<double>(rows * cols) + , Rows(rows) + , Cols(cols) + {} + + void Reset(size_t rows, size_t cols) + { + Rows = rows; + Cols = cols; + clear(); + resize(rows * cols); + } + + double& Cell(size_t row, size_t col) + { + Y_VERIFY(row < Rows); + Y_VERIFY(col < Cols); + return operator[](row * Cols + col); + } + + double Cell(size_t row, size_t col) const + { + Y_VERIFY(row < Rows); + Y_VERIFY(col < Cols); + return operator[](row * Cols + col); + } + + double CellSum() const + { + double sum = 0.0; + for (double x : *this) { + sum += x; + } + return sum; + } +}; + +} diff --git a/library/cpp/lwtrace/mon/analytics/html_output.h b/library/cpp/lwtrace/mon/analytics/html_output.h new file mode 100644 index 0000000000..f775f216b9 --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/html_output.h @@ -0,0 +1,86 @@ +#pragma once + +#include <util/string/printf.h> +#include <util/stream/str.h> +#include <util/generic/set.h> +#include "data.h" + +namespace NAnalytics { + +inline TString ToHtml(const TTable& in) +{ + TSet<TString> cols; + bool hasName = false; + for (const TRow& row : in) { + hasName = hasName || !row.Name.empty(); + for (const auto& kv : row) { + cols.insert(kv.first); + } + } + + TStringStream ss; + ss << "<table>"; + ss << "<thead><tr>"; + if (hasName) { + ss << "<th>Name</th>"; + } + for (const TString& c : cols) { + ss << "<th>" << c << "</th>"; + } + ss << "</tr></thead><tbody>"; + + for (const TRow& row : in) { + ss << "<tr>"; + if (hasName) { + ss << "<th>" << row.Name << "</th>"; + } + for (const TString& c : cols) { + TString value; + ss << "<td>" << (row.GetAsString(c, value) ? value : TString("-")) << "</td>"; + } + ss << "</tr>"; + } + ss << "</tbody></table>"; + + return ss.Str(); +} + +inline TString ToTransposedHtml(const TTable& in) +{ + TSet<TString> cols; + bool hasName = false; + for (const TRow& row : in) { + hasName = hasName || !row.Name.empty(); + for (const auto& kv : row) { + cols.insert(kv.first); + } + } + + TStringStream ss; + ss << "<table><thead>"; + if (hasName) { + ss << "<tr>"; + ss << "<th>Name</th>"; + for (const TRow& row : in) { + ss << "<th>" << row.Name << "</th>"; + } + ss << "</tr>"; + } + + ss << "</thead><tbody>"; + + for (const TString& c : cols) { + ss << "<tr>"; + ss << "<th>" << c << "</th>"; + for (const TRow& row : in) { + TString value; + ss << "<td>" << (row.GetAsString(c, value) ? value : TString("-")) << "</td>"; + } + ss << "</tr>"; + } + ss << "</tbody></table>"; + + return ss.Str(); +} + +} diff --git a/library/cpp/lwtrace/mon/analytics/json_output.h b/library/cpp/lwtrace/mon/analytics/json_output.h new file mode 100644 index 0000000000..189f9802d3 --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/json_output.h @@ -0,0 +1,98 @@ +#pragma once + +#include <util/string/printf.h> +#include <util/stream/str.h> +#include <util/string/vector.h> +#include <util/generic/set.h> +#include <util/generic/hash_set.h> +#include "data.h" +#include "util.h" + +namespace NAnalytics { + +inline TString ToJsonFlot(const TTable& in, const TString& xno, const TVector<TString>& ynos, const TString& opts = TString()) +{ + TStringStream ss; + ss << "[ "; + bool first = true; + + TString xn; + THashSet<TString> xopts; + ParseNameAndOpts(xno, xn, xopts); + bool xstack = xopts.contains("stack"); + + for (const TString& yno : ynos) { + TString yn; + THashSet<TString> yopts; + ParseNameAndOpts(yno, yn, yopts); + bool ystackOpt = yopts.contains("stack"); + + ss << (first? "": ",\n ") << "{ " << opts << (opts? ", ": "") << "\"label\": \"" << yn << "\", \"data\": [ "; + bool first2 = true; + using TPt = std::tuple<double, double, TString>; + std::vector<TPt> pts; + for (const TRow& row : in) { + double x, y; + if (row.Get(xn, x) && row.Get(yn, y)) { + pts.emplace_back(x, y, row.Name); + } + } + + if (xstack) { + std::sort(pts.begin(), pts.end(), [] (const TPt& a, const TPt& b) { + // At first sort by Name, then by x, then by y + return std::make_tuple(std::get<2>(a), std::get<0>(a), std::get<1>(a)) < + std::make_tuple(std::get<2>(b), std::get<0>(b), std::get<1>(b)); + }); + } else { + std::sort(pts.begin(), pts.end()); + } + + double x = 0.0, xsum = 0.0; + double y = 0.0, ysum = 0.0; + for (auto& pt : pts) { + if (xstack) { + x = xsum; + xsum += std::get<0>(pt); + } else { + x = std::get<0>(pt); + } + + if (ystackOpt) { + y = ysum; + ysum += std::get<1>(pt); + } else { + y = std::get<1>(pt); + } + + ss << (first2? "": ", ") << "[" + << Sprintf("%.6lf", Finitize(x)) << ", " // x coordinate + << Sprintf("%.6lf", Finitize(y)) << ", " // y coordinate + << "\"" << std::get<2>(pt) << "\", " // label + << Sprintf("%.6lf", std::get<0>(pt)) << ", " // x label (real value) + << Sprintf("%.6lf", std::get<1>(pt)) // y label (real value) + << "]"; + first2 = false; + } + // Add final point + if (!first2 && (xstack || ystackOpt)) { + if (xstack) + x = xsum; + if (ystackOpt) + y = ysum; + ss << (first2? "": ", ") << "[" + << Sprintf("%.6lf", Finitize(x)) << ", " // x coordinate + << Sprintf("%.6lf", Finitize(y)) << ", " // y coordinate + << "\"\", " + << Sprintf("%.6lf", x) << ", " // x label (real value) + << Sprintf("%.6lf", y) // y label (real value) + << "]"; + } + ss << " ] }"; + first = false; + } + ss << "\n]"; + return ss.Str(); +} + +} diff --git a/library/cpp/lwtrace/mon/analytics/transform.h b/library/cpp/lwtrace/mon/analytics/transform.h new file mode 100644 index 0000000000..f7dc9adb5b --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/transform.h @@ -0,0 +1,204 @@ +#pragma once + +#include "data.h" + +namespace NAnalytics { + +template <class TSkip, class TX, class TY> +inline TTable Histogram(const TTable& in, TSkip skip, + const TString& xn_out, TX x_in, + const TString& yn_out, TY y_in, + double x1, double x2, double dx) +{ + long buckets = (x2 - x1) / dx; + TTable out; + TString yn_sum = yn_out + "_sum"; + TString yn_share = yn_out + "_share"; + double ysum = 0.0; + out.resize(buckets); + for (size_t i = 0; i < out.size(); i++) { + double lb = x1 + dx*i; + double ub = lb + dx; + out[i].Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")"); + out[i][xn_out] = (lb + ub) / 2; + out[i][yn_sum] = 0.0; + } + for (const auto& row : in) { + if (skip(row)) { + continue; + } + double x = x_in(row); + long i = (x - x1) / dx; + if (x == x2) { // Special hack to include right edge + i--; + } + double y = y_in(row); + ysum += y; + if (i >= 0 && i < buckets) { + out[i][yn_sum] = y + out[i].GetOrDefault(yn_sum, 0.0); + } + } + for (TRow& row : out) { + if (ysum != 0.0) { + row[yn_share] = row.GetOrDefault(yn_sum, 0.0) / ysum; + } + } + return out; +} + +inline TTable HistogramAll(const TTable& in, const TString& xn, double x1, double x2, double dx) +{ + long buckets = (dx == 0.0? 1: (x2 - x1) / dx); + TTable out; + THashMap<TString, double> colSum; + out.resize(buckets); + + TSet<TString> cols; + for (auto& row : in) { + for (auto& kv : row) { + cols.insert(kv.first); + } + } + cols.insert("_count"); + cols.erase(xn); + + for (const TString& col : cols) { + colSum[col] = 0.0; + } + + for (size_t i = 0; i < out.size(); i++) { + double lb = x1 + dx*i; + double ub = lb + dx; + TRow& row = out[i]; + row.Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")"); + row[xn] = (lb + ub) / 2; + for (const TString& col : cols) { + row[col + "_sum"] = 0.0; + } + } + for (const TRow& row_in : in) { + double x; + if (!row_in.Get(xn, x)) { + continue; + } + long i = (dx == 0.0? 0: (x - x1) / dx); + if (x == x2 && dx > 0.0) { // Special hack to include right edge + i--; + } + for (const auto& kv : row_in) { + const TString& yn = kv.first; + if (yn == xn) { + continue; + } + double y; + if (!row_in.Get(yn, y)) { + continue; + } + colSum[yn] += y; + if (i >= 0 && i < buckets) { + out[i][yn + "_cnt"] = out[i].GetOrDefault(yn + "_cnt") + 1; + out[i][yn + "_sum"] = out[i].GetOrDefault(yn + "_sum") + y; + if (out[i].contains(yn + "_min")) { + out[i][yn + "_min"] = Min(y, out[i].GetOrDefault(yn + "_min")); + } else { + out[i][yn + "_min"] = y; + } + if (out[i].contains(yn + "_max")) { + out[i][yn + "_max"] = Max(y, out[i].GetOrDefault(yn + "_max")); + } else { + out[i][yn + "_max"] = y; + } + } + } + colSum["_count"]++; + if (i >= 0 && i < buckets) { + out[i]["_count_sum"] = out[i].GetOrDefault("_count_sum") + 1; + } + } + for (TRow& row : out) { + for (const TString& col : cols) { + double ysum = colSum[col]; + if (col != "_count") { + if (row.GetOrDefault(col + "_cnt") != 0.0) { + row[col + "_avg"] = row.GetOrDefault(col + "_sum") / row.GetOrDefault(col + "_cnt"); + } + } + if (ysum != 0.0) { + row[col + "_share"] = row.GetOrDefault(col + "_sum") / ysum; + } + } + } + return out; +} + +inline TMatrix CovarianceMatrix(const TTable& in) +{ + TSet<TString> cols; + for (auto& row : in) { + for (auto& kv : row) { + cols.insert(kv.first); + } + } + + struct TAggregate { + size_t Idx = 0; + double Sum = 0; + size_t Count = 0; + double Mean = 0; + }; + + THashMap<TString, TAggregate> colAggr; + + size_t colCount = 0; + for (const TString& col : cols) { + TAggregate& aggr = colAggr[col]; + aggr.Idx = colCount++; + } + + for (const TRow& row : in) { + for (const auto& kv : row) { + const TString& xn = kv.first; + double x; + if (!row.Get(xn, x)) { + continue; + } + TAggregate& aggr = colAggr[xn]; + aggr.Sum += x; + aggr.Count++; + } + } + + for (auto& kv : colAggr) { + TAggregate& aggr = kv.second; + aggr.Mean = aggr.Sum / aggr.Count; + } + + TMatrix covCount(cols.size(), cols.size()); + TMatrix cov(cols.size(), cols.size()); + for (const TRow& row : in) { + for (const auto& kv1 : row) { + double x; + if (!row.Get(kv1.first, x)) { + continue; + } + TAggregate& xaggr = colAggr[kv1.first]; + for (const auto& kv2 : row) { + double y; + if (!row.Get(kv2.first, y)) { + continue; + } + TAggregate& yaggr = colAggr[kv2.first]; + covCount.Cell(xaggr.Idx, yaggr.Idx)++; + cov.Cell(xaggr.Idx, yaggr.Idx) += (x - xaggr.Mean) * (y - yaggr.Mean); + } + } + } + + for (size_t idx = 0; idx < cov.size(); idx++) { + cov[idx] /= covCount[idx]; + } + + return cov; +} + +} diff --git a/library/cpp/lwtrace/mon/analytics/util.h b/library/cpp/lwtrace/mon/analytics/util.h new file mode 100644 index 0000000000..e07d06cc43 --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/util.h @@ -0,0 +1,122 @@ +#pragma once + +#include "data.h" +#include <util/generic/algorithm.h> +#include <util/generic/hash_set.h> +#include <util/string/vector.h> + +namespace NAnalytics { + +// Get rid of NaNs and INFs +inline double Finitize(double x, double notFiniteValue = 0.0) +{ + return isfinite(x)? x: notFiniteValue; +} + +inline void ParseNameAndOpts(const TString& nameAndOpts, TString& name, THashSet<TString>& opts) +{ + name.clear(); + opts.clear(); + bool first = true; + auto vs = SplitString(nameAndOpts, "-"); + for (const auto& s : vs) { + if (first) { + name = s; + first = false; + } else { + opts.insert(s); + } + } +} + +inline TString ParseName(const TString& nameAndOpts) +{ + auto vs = SplitString(nameAndOpts, "-"); + if (vs.empty()) { + return TString(); + } else { + return vs[0]; + } +} + +template <class R, class T> +inline R AccumulateIfExist(const TString& name, const TTable& table, R r, T t) +{ + ForEach(table.begin(), table.end(), [=,&r] (const TRow& row) { + double value; + if (row.Get(name, value)) { + r = t(r, value); + } + }); + return r; +} + +inline double MinValue(const TString& nameAndOpts, const TTable& table) +{ + TString name; + THashSet<TString> opts; + ParseNameAndOpts(nameAndOpts, name, opts); + bool stack = opts.contains("stack"); + if (stack) { + return 0.0; + } else { + auto zero = 0.0; + + return AccumulateIfExist(name, table, 1.0 / zero /*+inf*/, [] (double x, double y) { + return Min(x, y); + }); + } +} + +inline double MaxValue(const TString& nameAndOpts, const TTable& table) +{ + TString name; + THashSet<TString> opts; + ParseNameAndOpts(nameAndOpts, name, opts); + bool stack = opts.contains("stack"); + if (stack) { + return AccumulateIfExist(name, table, 0.0, [] (double x, double y) { + return x + y; + }); + } else { + auto zero = 0.0; + + return AccumulateIfExist(name, table, -1.0 / zero /*-inf*/, [] (double x, double y) { + return Max(x, y); + }); + } +} + +template <class T> +inline void Map(TTable& table, const TString& rname, T t) +{ + ForEach(table.begin(), table.end(), [=] (TRow& row) { + row[rname] = t(row); + }); +} + +inline std::function<bool(const TRow&)> HasNoValueFor(TString name) +{ + return [=] (const TRow& row) -> bool { + double value; + return !row.Get(name, value); + }; +} + + +inline std::function<double(const TRow&)> GetValueFor(TString name, double defVal = 0.0) +{ + return [=] (const TRow& row) -> double { + double value; + return row.Get(name, value)? value: defVal; + }; +} + +inline std::function<double(const TRow&)> Const(double defVal = 0.0) +{ + return [=] (const TRow&) { + return defVal; + }; +} + +} diff --git a/library/cpp/lwtrace/mon/analytics/ya.make b/library/cpp/lwtrace/mon/analytics/ya.make new file mode 100644 index 0000000000..c18e23c8e1 --- /dev/null +++ b/library/cpp/lwtrace/mon/analytics/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +OWNER(serxa g:kikimr) + +PEERDIR( +) + +SRCS( + analytics.cpp +) + +END() + +RECURSE( +) |