diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
commit | 8cbc307de0221f84c80c42dcbe07d40727537e2c (patch) | |
tree | 625d5a673015d1df891e051033e9fcde5c7be4e5 /library/cpp/accurate_accumulate/benchmark | |
parent | 30d1ef3941e0dc835be7609de5ebee66958f215a (diff) | |
download | ydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/accurate_accumulate/benchmark')
4 files changed, 119 insertions, 119 deletions
diff --git a/library/cpp/accurate_accumulate/benchmark/main.cpp b/library/cpp/accurate_accumulate/benchmark/main.cpp index 3c5e6e775d..6dcd8a9635 100644 --- a/library/cpp/accurate_accumulate/benchmark/main.cpp +++ b/library/cpp/accurate_accumulate/benchmark/main.cpp @@ -1,97 +1,97 @@ #include <library/cpp/accurate_accumulate/accurate_accumulate.h> #include <library/cpp/testing/benchmark/bench.h> - -#include <util/generic/algorithm.h> -#include <util/generic/singleton.h> -#include <util/generic/vector.h> -#include <util/generic/xrange.h> -#include <util/random/fast.h> - -namespace { - template <typename T, size_t N> - struct TNormalizedExamplesHolder { + +#include <util/generic/algorithm.h> +#include <util/generic/singleton.h> +#include <util/generic/vector.h> +#include <util/generic/xrange.h> +#include <util/random/fast.h> + +namespace { + template <typename T, size_t N> + struct TNormalizedExamplesHolder { TVector<T> Examples; - TNormalizedExamplesHolder() - : Examples(N) - { - TFastRng<ui64> prng{sizeof(T) * N * 42u}; - for (auto& x : Examples) { - x = prng.GenRandReal4(); - } - } - }; - - template <typename T, size_t N> - struct TExamplesHolder { + TNormalizedExamplesHolder() + : Examples(N) + { + TFastRng<ui64> prng{sizeof(T) * N * 42u}; + for (auto& x : Examples) { + x = prng.GenRandReal4(); + } + } + }; + + template <typename T, size_t N> + struct TExamplesHolder { TVector<T> Examples; - TExamplesHolder() - : Examples(N) - { - TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u}; - for (auto& x : Examples) { - // operations with non-normalized floating point numbers are rumored to work slower - x = prng.GenRandReal4() + prng.Uniform(1024u); - } - } - }; -} - -#define DEFINE_BENCHMARK(type, count) \ - Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ - const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ - const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ - const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ - const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ - } \ - } - -DEFINE_BENCHMARK(float, 2) -DEFINE_BENCHMARK(float, 4) -DEFINE_BENCHMARK(float, 8) -DEFINE_BENCHMARK(float, 16) -DEFINE_BENCHMARK(float, 32) -DEFINE_BENCHMARK(float, 64) -DEFINE_BENCHMARK(float, 128) -DEFINE_BENCHMARK(float, 256) -DEFINE_BENCHMARK(float, 512) -DEFINE_BENCHMARK(float, 1024) -DEFINE_BENCHMARK(double, 2) -DEFINE_BENCHMARK(double, 4) -DEFINE_BENCHMARK(double, 8) -DEFINE_BENCHMARK(double, 16) -DEFINE_BENCHMARK(double, 32) -DEFINE_BENCHMARK(double, 64) -DEFINE_BENCHMARK(double, 128) -DEFINE_BENCHMARK(double, 256) -DEFINE_BENCHMARK(double, 512) -DEFINE_BENCHMARK(double, 1024) - -#undef DEFINE_BENCHMARK + TExamplesHolder() + : Examples(N) + { + TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u}; + for (auto& x : Examples) { + // operations with non-normalized floating point numbers are rumored to work slower + x = prng.GenRandReal4() + prng.Uniform(1024u); + } + } + }; +} + +#define DEFINE_BENCHMARK(type, count) \ + Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ + const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ + const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ + const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ + const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ + } \ + } + +DEFINE_BENCHMARK(float, 2) +DEFINE_BENCHMARK(float, 4) +DEFINE_BENCHMARK(float, 8) +DEFINE_BENCHMARK(float, 16) +DEFINE_BENCHMARK(float, 32) +DEFINE_BENCHMARK(float, 64) +DEFINE_BENCHMARK(float, 128) +DEFINE_BENCHMARK(float, 256) +DEFINE_BENCHMARK(float, 512) +DEFINE_BENCHMARK(float, 1024) +DEFINE_BENCHMARK(double, 2) +DEFINE_BENCHMARK(double, 4) +DEFINE_BENCHMARK(double, 8) +DEFINE_BENCHMARK(double, 16) +DEFINE_BENCHMARK(double, 32) +DEFINE_BENCHMARK(double, 64) +DEFINE_BENCHMARK(double, 128) +DEFINE_BENCHMARK(double, 256) +DEFINE_BENCHMARK(double, 512) +DEFINE_BENCHMARK(double, 1024) + +#undef DEFINE_BENCHMARK diff --git a/library/cpp/accurate_accumulate/benchmark/metrics/main.py b/library/cpp/accurate_accumulate/benchmark/metrics/main.py index 311fc219ce..dc90060625 100644 --- a/library/cpp/accurate_accumulate/benchmark/metrics/main.py +++ b/library/cpp/accurate_accumulate/benchmark/metrics/main.py @@ -1,7 +1,7 @@ -import yatest.common as yc - - -def test_export_metrics(metrics): - metrics.set_benchmark(yc.execute_benchmark( +import yatest.common as yc + + +def test_export_metrics(metrics): + metrics.set_benchmark(yc.execute_benchmark( 'library/cpp/accurate_accumulate/benchmark/benchmark', - threads=8)) + threads=8)) diff --git a/library/cpp/accurate_accumulate/benchmark/metrics/ya.make b/library/cpp/accurate_accumulate/benchmark/metrics/ya.make index 5d532e1479..45ef7a464e 100644 --- a/library/cpp/accurate_accumulate/benchmark/metrics/ya.make +++ b/library/cpp/accurate_accumulate/benchmark/metrics/ya.make @@ -1,17 +1,17 @@ OWNER(yazevnul) - + PY2TEST() - + SIZE(LARGE) - -TAG( + +TAG( ya:force_sandbox - sb:intel_e5_2660v1 + sb:intel_e5_2660v1 ya:fat -) - +) + TEST_SRCS(main.py) - + DEPENDS(library/cpp/accurate_accumulate/benchmark) - -END() + +END() diff --git a/library/cpp/accurate_accumulate/benchmark/ya.make b/library/cpp/accurate_accumulate/benchmark/ya.make index 20fd877389..48b8486966 100644 --- a/library/cpp/accurate_accumulate/benchmark/ya.make +++ b/library/cpp/accurate_accumulate/benchmark/ya.make @@ -1,13 +1,13 @@ OWNER(yazevnul) - + Y_BENCHMARK() - -SRCS( - main.cpp -) - -PEERDIR( + +SRCS( + main.cpp +) + +PEERDIR( library/cpp/accurate_accumulate -) - -END() +) + +END() |