diff options
author | serxa <serxa@yandex-team.ru> | 2022-02-10 16:49:08 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:08 +0300 |
commit | d6d7db348c2cc64e71243cab9940ee6778f4317d (patch) | |
tree | bac67f42a02f9368eb4d329f5d79b77d0a6adc18 /library/cpp/lwtrace/mon/analytics/transform.h | |
parent | 8d57b69dee81198a59c39e64704f7dc9f04b4fbf (diff) | |
download | ydb-d6d7db348c2cc64e71243cab9940ee6778f4317d.tar.gz |
Restoring authorship annotation for <serxa@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/lwtrace/mon/analytics/transform.h')
-rw-r--r-- | library/cpp/lwtrace/mon/analytics/transform.h | 324 |
1 files changed, 162 insertions, 162 deletions
diff --git a/library/cpp/lwtrace/mon/analytics/transform.h b/library/cpp/lwtrace/mon/analytics/transform.h index f7dc9adb5b..bd80d49d35 100644 --- a/library/cpp/lwtrace/mon/analytics/transform.h +++ b/library/cpp/lwtrace/mon/analytics/transform.h @@ -1,204 +1,204 @@ -#pragma once - +#pragma once + #include "data.h" - -namespace NAnalytics { - -template <class TSkip, class TX, class TY> -inline TTable Histogram(const TTable& in, TSkip skip, + +namespace NAnalytics { + +template <class TSkip, class TX, class TY> +inline TTable Histogram(const TTable& in, TSkip skip, const TString& xn_out, TX x_in, const TString& yn_out, TY y_in, - double x1, double x2, double dx) -{ - long buckets = (x2 - x1) / dx; - TTable out; + double x1, double x2, double dx) +{ + long buckets = (x2 - x1) / dx; + TTable out; TString yn_sum = yn_out + "_sum"; TString yn_share = yn_out + "_share"; - double ysum = 0.0; - out.resize(buckets); - for (size_t i = 0; i < out.size(); i++) { - double lb = x1 + dx*i; - double ub = lb + dx; - out[i].Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")"); - out[i][xn_out] = (lb + ub) / 2; - out[i][yn_sum] = 0.0; - } - for (const auto& row : in) { - if (skip(row)) { - continue; - } - double x = x_in(row); - long i = (x - x1) / dx; - if (x == x2) { // Special hack to include right edge - i--; - } - double y = y_in(row); - ysum += y; - if (i >= 0 && i < buckets) { + double ysum = 0.0; + out.resize(buckets); + for (size_t i = 0; i < out.size(); i++) { + double lb = x1 + dx*i; + double ub = lb + dx; + out[i].Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")"); + out[i][xn_out] = (lb + ub) / 2; + out[i][yn_sum] = 0.0; + } + for (const auto& row : in) { + if (skip(row)) { + continue; + } + double x = x_in(row); + long i = (x - x1) / dx; + if (x == x2) { // Special hack to include right edge + i--; + } + double y = y_in(row); + ysum += y; + if (i >= 0 && i < buckets) { out[i][yn_sum] = y + out[i].GetOrDefault(yn_sum, 0.0); - } - } - for (TRow& row : out) { - if (ysum != 0.0) { + } + } + for (TRow& row : out) { + if (ysum != 0.0) { row[yn_share] = row.GetOrDefault(yn_sum, 0.0) / ysum; - } - } - return out; -} - + } + } + return out; +} + inline TTable HistogramAll(const TTable& in, const TString& xn, double x1, double x2, double dx) -{ - long buckets = (dx == 0.0? 1: (x2 - x1) / dx); - TTable out; +{ + long buckets = (dx == 0.0? 1: (x2 - x1) / dx); + TTable out; THashMap<TString, double> colSum; - out.resize(buckets); - + out.resize(buckets); + TSet<TString> cols; - for (auto& row : in) { - for (auto& kv : row) { - cols.insert(kv.first); - } - } - cols.insert("_count"); - cols.erase(xn); - + for (auto& row : in) { + for (auto& kv : row) { + cols.insert(kv.first); + } + } + cols.insert("_count"); + cols.erase(xn); + for (const TString& col : cols) { - colSum[col] = 0.0; - } - - for (size_t i = 0; i < out.size(); i++) { - double lb = x1 + dx*i; - double ub = lb + dx; - TRow& row = out[i]; - row.Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")"); - row[xn] = (lb + ub) / 2; + colSum[col] = 0.0; + } + + for (size_t i = 0; i < out.size(); i++) { + double lb = x1 + dx*i; + double ub = lb + dx; + TRow& row = out[i]; + row.Name = "[" + ToString(lb) + ";" + ToString(ub) + (ub==x2? "]": ")"); + row[xn] = (lb + ub) / 2; for (const TString& col : cols) { - row[col + "_sum"] = 0.0; - } - } - for (const TRow& row_in : in) { - double x; - if (!row_in.Get(xn, x)) { - continue; - } - long i = (dx == 0.0? 0: (x - x1) / dx); - if (x == x2 && dx > 0.0) { // Special hack to include right edge - i--; - } - for (const auto& kv : row_in) { + row[col + "_sum"] = 0.0; + } + } + for (const TRow& row_in : in) { + double x; + if (!row_in.Get(xn, x)) { + continue; + } + long i = (dx == 0.0? 0: (x - x1) / dx); + if (x == x2 && dx > 0.0) { // Special hack to include right edge + i--; + } + for (const auto& kv : row_in) { const TString& yn = kv.first; - if (yn == xn) { - continue; - } + if (yn == xn) { + continue; + } double y; if (!row_in.Get(yn, y)) { continue; } - colSum[yn] += y; - if (i >= 0 && i < buckets) { + colSum[yn] += y; + if (i >= 0 && i < buckets) { out[i][yn + "_cnt"] = out[i].GetOrDefault(yn + "_cnt") + 1; out[i][yn + "_sum"] = out[i].GetOrDefault(yn + "_sum") + y; if (out[i].contains(yn + "_min")) { out[i][yn + "_min"] = Min(y, out[i].GetOrDefault(yn + "_min")); - } else { - out[i][yn + "_min"] = y; - } + } else { + out[i][yn + "_min"] = y; + } if (out[i].contains(yn + "_max")) { out[i][yn + "_max"] = Max(y, out[i].GetOrDefault(yn + "_max")); - } else { - out[i][yn + "_max"] = y; - } - } - } - colSum["_count"]++; - if (i >= 0 && i < buckets) { + } else { + out[i][yn + "_max"] = y; + } + } + } + colSum["_count"]++; + if (i >= 0 && i < buckets) { out[i]["_count_sum"] = out[i].GetOrDefault("_count_sum") + 1; - } - } - for (TRow& row : out) { + } + } + for (TRow& row : out) { for (const TString& col : cols) { - double ysum = colSum[col]; - if (col != "_count") { + double ysum = colSum[col]; + if (col != "_count") { if (row.GetOrDefault(col + "_cnt") != 0.0) { row[col + "_avg"] = row.GetOrDefault(col + "_sum") / row.GetOrDefault(col + "_cnt"); - } - } - if (ysum != 0.0) { + } + } + if (ysum != 0.0) { row[col + "_share"] = row.GetOrDefault(col + "_sum") / ysum; - } - } - } - return out; -} - -inline TMatrix CovarianceMatrix(const TTable& in) -{ + } + } + } + return out; +} + +inline TMatrix CovarianceMatrix(const TTable& in) +{ TSet<TString> cols; - for (auto& row : in) { - for (auto& kv : row) { - cols.insert(kv.first); - } - } - - struct TAggregate { - size_t Idx = 0; - double Sum = 0; - size_t Count = 0; - double Mean = 0; - }; - + for (auto& row : in) { + for (auto& kv : row) { + cols.insert(kv.first); + } + } + + struct TAggregate { + size_t Idx = 0; + double Sum = 0; + size_t Count = 0; + double Mean = 0; + }; + THashMap<TString, TAggregate> colAggr; - - size_t colCount = 0; - for (const TString& col : cols) { - TAggregate& aggr = colAggr[col]; - aggr.Idx = colCount++; - } - - for (const TRow& row : in) { - for (const auto& kv : row) { - const TString& xn = kv.first; + + size_t colCount = 0; + for (const TString& col : cols) { + TAggregate& aggr = colAggr[col]; + aggr.Idx = colCount++; + } + + for (const TRow& row : in) { + for (const auto& kv : row) { + const TString& xn = kv.first; double x; if (!row.Get(xn, x)) { continue; } - TAggregate& aggr = colAggr[xn]; - aggr.Sum += x; - aggr.Count++; - } - } - - for (auto& kv : colAggr) { - TAggregate& aggr = kv.second; - aggr.Mean = aggr.Sum / aggr.Count; - } - - TMatrix covCount(cols.size(), cols.size()); - TMatrix cov(cols.size(), cols.size()); - for (const TRow& row : in) { - for (const auto& kv1 : row) { + TAggregate& aggr = colAggr[xn]; + aggr.Sum += x; + aggr.Count++; + } + } + + for (auto& kv : colAggr) { + TAggregate& aggr = kv.second; + aggr.Mean = aggr.Sum / aggr.Count; + } + + TMatrix covCount(cols.size(), cols.size()); + TMatrix cov(cols.size(), cols.size()); + for (const TRow& row : in) { + for (const auto& kv1 : row) { double x; if (!row.Get(kv1.first, x)) { continue; } - TAggregate& xaggr = colAggr[kv1.first]; - for (const auto& kv2 : row) { + TAggregate& xaggr = colAggr[kv1.first]; + for (const auto& kv2 : row) { double y; if (!row.Get(kv2.first, y)) { continue; } - TAggregate& yaggr = colAggr[kv2.first]; - covCount.Cell(xaggr.Idx, yaggr.Idx)++; - cov.Cell(xaggr.Idx, yaggr.Idx) += (x - xaggr.Mean) * (y - yaggr.Mean); - } - } - } - - for (size_t idx = 0; idx < cov.size(); idx++) { - cov[idx] /= covCount[idx]; - } - - return cov; -} - -} + TAggregate& yaggr = colAggr[kv2.first]; + covCount.Cell(xaggr.Idx, yaggr.Idx)++; + cov.Cell(xaggr.Idx, yaggr.Idx) += (x - xaggr.Mean) * (y - yaggr.Mean); + } + } + } + + for (size_t idx = 0; idx < cov.size(); idx++) { + cov[idx] /= covCount[idx]; + } + + return cov; +} + +} |