diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
commit | 8cbc307de0221f84c80c42dcbe07d40727537e2c (patch) | |
tree | 625d5a673015d1df891e051033e9fcde5c7be4e5 /library/cpp/accurate_accumulate/benchmark/main.cpp | |
parent | 30d1ef3941e0dc835be7609de5ebee66958f215a (diff) | |
download | ydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/accurate_accumulate/benchmark/main.cpp')
-rw-r--r-- | library/cpp/accurate_accumulate/benchmark/main.cpp | 186 |
1 files changed, 93 insertions, 93 deletions
diff --git a/library/cpp/accurate_accumulate/benchmark/main.cpp b/library/cpp/accurate_accumulate/benchmark/main.cpp index 3c5e6e775d..6dcd8a9635 100644 --- a/library/cpp/accurate_accumulate/benchmark/main.cpp +++ b/library/cpp/accurate_accumulate/benchmark/main.cpp @@ -1,97 +1,97 @@ #include <library/cpp/accurate_accumulate/accurate_accumulate.h> #include <library/cpp/testing/benchmark/bench.h> - -#include <util/generic/algorithm.h> -#include <util/generic/singleton.h> -#include <util/generic/vector.h> -#include <util/generic/xrange.h> -#include <util/random/fast.h> - -namespace { - template <typename T, size_t N> - struct TNormalizedExamplesHolder { + +#include <util/generic/algorithm.h> +#include <util/generic/singleton.h> +#include <util/generic/vector.h> +#include <util/generic/xrange.h> +#include <util/random/fast.h> + +namespace { + template <typename T, size_t N> + struct TNormalizedExamplesHolder { TVector<T> Examples; - TNormalizedExamplesHolder() - : Examples(N) - { - TFastRng<ui64> prng{sizeof(T) * N * 42u}; - for (auto& x : Examples) { - x = prng.GenRandReal4(); - } - } - }; - - template <typename T, size_t N> - struct TExamplesHolder { + TNormalizedExamplesHolder() + : Examples(N) + { + TFastRng<ui64> prng{sizeof(T) * N * 42u}; + for (auto& x : Examples) { + x = prng.GenRandReal4(); + } + } + }; + + template <typename T, size_t N> + struct TExamplesHolder { TVector<T> Examples; - TExamplesHolder() - : Examples(N) - { - TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u}; - for (auto& x : Examples) { - // operations with non-normalized floating point numbers are rumored to work slower - x = prng.GenRandReal4() + prng.Uniform(1024u); - } - } - }; -} - -#define DEFINE_BENCHMARK(type, count) \ - Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ - const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ - const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ - const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ - const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ - } \ - } - -DEFINE_BENCHMARK(float, 2) -DEFINE_BENCHMARK(float, 4) -DEFINE_BENCHMARK(float, 8) -DEFINE_BENCHMARK(float, 16) -DEFINE_BENCHMARK(float, 32) -DEFINE_BENCHMARK(float, 64) -DEFINE_BENCHMARK(float, 128) -DEFINE_BENCHMARK(float, 256) -DEFINE_BENCHMARK(float, 512) -DEFINE_BENCHMARK(float, 1024) -DEFINE_BENCHMARK(double, 2) -DEFINE_BENCHMARK(double, 4) -DEFINE_BENCHMARK(double, 8) -DEFINE_BENCHMARK(double, 16) -DEFINE_BENCHMARK(double, 32) -DEFINE_BENCHMARK(double, 64) -DEFINE_BENCHMARK(double, 128) -DEFINE_BENCHMARK(double, 256) -DEFINE_BENCHMARK(double, 512) -DEFINE_BENCHMARK(double, 1024) - -#undef DEFINE_BENCHMARK + TExamplesHolder() + : Examples(N) + { + TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u}; + for (auto& x : Examples) { + // operations with non-normalized floating point numbers are rumored to work slower + x = prng.GenRandReal4() + prng.Uniform(1024u); + } + } + }; +} + +#define DEFINE_BENCHMARK(type, count) \ + Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ + const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ + const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ + const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ + const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ + } \ + } + +DEFINE_BENCHMARK(float, 2) +DEFINE_BENCHMARK(float, 4) +DEFINE_BENCHMARK(float, 8) +DEFINE_BENCHMARK(float, 16) +DEFINE_BENCHMARK(float, 32) +DEFINE_BENCHMARK(float, 64) +DEFINE_BENCHMARK(float, 128) +DEFINE_BENCHMARK(float, 256) +DEFINE_BENCHMARK(float, 512) +DEFINE_BENCHMARK(float, 1024) +DEFINE_BENCHMARK(double, 2) +DEFINE_BENCHMARK(double, 4) +DEFINE_BENCHMARK(double, 8) +DEFINE_BENCHMARK(double, 16) +DEFINE_BENCHMARK(double, 32) +DEFINE_BENCHMARK(double, 64) +DEFINE_BENCHMARK(double, 128) +DEFINE_BENCHMARK(double, 256) +DEFINE_BENCHMARK(double, 512) +DEFINE_BENCHMARK(double, 1024) + +#undef DEFINE_BENCHMARK |