diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
commit | 9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/cpp/accurate_accumulate | |
parent | 8cbc307de0221f84c80c42dcbe07d40727537e2c (diff) | |
download | ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/accurate_accumulate')
5 files changed, 167 insertions, 167 deletions
diff --git a/library/cpp/accurate_accumulate/accurate_accumulate.h b/library/cpp/accurate_accumulate/accurate_accumulate.h index 04e362019a..dacced17e9 100644 --- a/library/cpp/accurate_accumulate/accurate_accumulate.h +++ b/library/cpp/accurate_accumulate/accurate_accumulate.h @@ -1,37 +1,37 @@ #pragma once -#include <util/ysaveload.h> +#include <util/ysaveload.h> #include <util/generic/vector.h> #include <util/system/yassert.h> -//! See more details here http://en.wikipedia.org/wiki/Kahan_summation_algorithm +//! See more details here http://en.wikipedia.org/wiki/Kahan_summation_algorithm template <typename TAccumulateType> class TKahanAccumulator { public: using TValueType = TAccumulateType; - template <typename TFloatType> + template <typename TFloatType> explicit TKahanAccumulator(const TFloatType x) - : Sum_(x) - , Compensation_() - { - } - + : Sum_(x) + , Compensation_() + { + } + TKahanAccumulator() - : Sum_() - , Compensation_() + : Sum_() + , Compensation_() { } template <typename TFloatType> - TKahanAccumulator& operator=(const TFloatType& rhs) { - Sum_ = TValueType(rhs); - Compensation_ = TValueType(); + TKahanAccumulator& operator=(const TFloatType& rhs) { + Sum_ = TValueType(rhs); + Compensation_ = TValueType(); return *this; } TValueType Get() const { - return Sum_ + Compensation_; + return Sum_ + Compensation_; } template <typename TFloatType> @@ -40,77 +40,77 @@ public: } template <typename TFloatType> - inline bool operator<(const TKahanAccumulator<TFloatType>& other) const { + inline bool operator<(const TKahanAccumulator<TFloatType>& other) const { return Get() < other.Get(); } template <typename TFloatType> - inline bool operator<=(const TKahanAccumulator<TFloatType>& other) const { + inline bool operator<=(const TKahanAccumulator<TFloatType>& other) const { return !(other < *this); } template <typename TFloatType> - inline bool operator>(const TKahanAccumulator<TFloatType>& other) const { + inline bool operator>(const TKahanAccumulator<TFloatType>& other) const { return other < *this; } template <typename TFloatType> - inline bool operator>=(const TKahanAccumulator<TFloatType>& other) const { + inline bool operator>=(const TKahanAccumulator<TFloatType>& other) const { return !(*this < other); } template <typename TFloatType> - inline TKahanAccumulator& operator+=(const TFloatType x) { - const TValueType y = TValueType(x) - Compensation_; - const TValueType t = Sum_ + y; - Compensation_ = (t - Sum_) - y; - Sum_ = t; + inline TKahanAccumulator& operator+=(const TFloatType x) { + const TValueType y = TValueType(x) - Compensation_; + const TValueType t = Sum_ + y; + Compensation_ = (t - Sum_) - y; + Sum_ = t; return *this; } template <typename TFloatType> - inline TKahanAccumulator& operator-=(const TFloatType x) { - return *this += -TValueType(x); + inline TKahanAccumulator& operator-=(const TFloatType x) { + return *this += -TValueType(x); } template <typename TFloatType> - inline TKahanAccumulator& operator*=(const TFloatType x) { - return *this = TValueType(*this) * TValueType(x); + inline TKahanAccumulator& operator*=(const TFloatType x) { + return *this = TValueType(*this) * TValueType(x); } template <typename TFloatType> - inline TKahanAccumulator& operator/=(const TFloatType x) { - return *this = TValueType(*this) / TValueType(x); + inline TKahanAccumulator& operator/=(const TFloatType x) { + return *this = TValueType(*this) / TValueType(x); } - - Y_SAVELOAD_DEFINE(Sum_, Compensation_) - + + Y_SAVELOAD_DEFINE(Sum_, Compensation_) + private: - TValueType Sum_; - TValueType Compensation_; + TValueType Sum_; + TValueType Compensation_; }; template <typename TAccumulateType, typename TFloatType> inline const TKahanAccumulator<TAccumulateType> -operator+(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { - return lhs += rhs; -} - +operator+(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { + return lhs += rhs; +} + template <typename TAccumulateType, typename TFloatType> inline const TKahanAccumulator<TAccumulateType> -operator-(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { - return lhs -= rhs; -} - +operator-(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { + return lhs -= rhs; +} + template <typename TAccumulateType, typename TFloatType> inline const TKahanAccumulator<TAccumulateType> -operator*(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { +operator*(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { return lhs *= rhs; } template <typename TAccumulateType, typename TFloatType> inline const TKahanAccumulator<TAccumulateType> -operator/(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { +operator/(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { return lhs /= rhs; } @@ -190,7 +190,7 @@ static inline double FastAccumulate(const TVector<T>& sequence) { template <typename It> static inline double FastKahanAccumulate(It begin, It end) { - return TypedFastAccumulate<TKahanAccumulator<double>>(begin, end); + return TypedFastAccumulate<TKahanAccumulator<double>>(begin, end); } template <typename T> @@ -205,17 +205,17 @@ static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) { template <typename T> static inline double FastInnerProduct(const TVector<T>& lhs, const TVector<T>& rhs) { - Y_ASSERT(lhs.size() == rhs.size()); + Y_ASSERT(lhs.size() == rhs.size()); return FastInnerProduct(lhs.begin(), lhs.end(), rhs.begin()); } template <typename It1, typename It2> static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) { - return TypedFastInnerProduct<TKahanAccumulator<double>>(begin1, end1, begin2); + return TypedFastInnerProduct<TKahanAccumulator<double>>(begin1, end1, begin2); } template <typename T> static inline double FastKahanInnerProduct(const TVector<T>& lhs, const TVector<T>& rhs) { - Y_ASSERT(lhs.size() == rhs.size()); + Y_ASSERT(lhs.size() == rhs.size()); return FastKahanInnerProduct(lhs.begin(), lhs.end(), rhs.begin()); } diff --git a/library/cpp/accurate_accumulate/benchmark/main.cpp b/library/cpp/accurate_accumulate/benchmark/main.cpp index 6dcd8a9635..3c5e6e775d 100644 --- a/library/cpp/accurate_accumulate/benchmark/main.cpp +++ b/library/cpp/accurate_accumulate/benchmark/main.cpp @@ -1,97 +1,97 @@ #include <library/cpp/accurate_accumulate/accurate_accumulate.h> #include <library/cpp/testing/benchmark/bench.h> - -#include <util/generic/algorithm.h> -#include <util/generic/singleton.h> -#include <util/generic/vector.h> -#include <util/generic/xrange.h> -#include <util/random/fast.h> - -namespace { - template <typename T, size_t N> - struct TNormalizedExamplesHolder { + +#include <util/generic/algorithm.h> +#include <util/generic/singleton.h> +#include <util/generic/vector.h> +#include <util/generic/xrange.h> +#include <util/random/fast.h> + +namespace { + template <typename T, size_t N> + struct TNormalizedExamplesHolder { TVector<T> Examples; - TNormalizedExamplesHolder() - : Examples(N) - { - TFastRng<ui64> prng{sizeof(T) * N * 42u}; - for (auto& x : Examples) { - x = prng.GenRandReal4(); - } - } - }; - - template <typename T, size_t N> - struct TExamplesHolder { + TNormalizedExamplesHolder() + : Examples(N) + { + TFastRng<ui64> prng{sizeof(T) * N * 42u}; + for (auto& x : Examples) { + x = prng.GenRandReal4(); + } + } + }; + + template <typename T, size_t N> + struct TExamplesHolder { TVector<T> Examples; - TExamplesHolder() - : Examples(N) - { - TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u}; - for (auto& x : Examples) { - // operations with non-normalized floating point numbers are rumored to work slower - x = prng.GenRandReal4() + prng.Uniform(1024u); - } - } - }; -} - -#define DEFINE_BENCHMARK(type, count) \ - Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ - const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ - const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ - const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ - const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - Y_DO_NOT_OPTIMIZE_AWAY( \ - (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ - } \ - } - -DEFINE_BENCHMARK(float, 2) -DEFINE_BENCHMARK(float, 4) -DEFINE_BENCHMARK(float, 8) -DEFINE_BENCHMARK(float, 16) -DEFINE_BENCHMARK(float, 32) -DEFINE_BENCHMARK(float, 64) -DEFINE_BENCHMARK(float, 128) -DEFINE_BENCHMARK(float, 256) -DEFINE_BENCHMARK(float, 512) -DEFINE_BENCHMARK(float, 1024) -DEFINE_BENCHMARK(double, 2) -DEFINE_BENCHMARK(double, 4) -DEFINE_BENCHMARK(double, 8) -DEFINE_BENCHMARK(double, 16) -DEFINE_BENCHMARK(double, 32) -DEFINE_BENCHMARK(double, 64) -DEFINE_BENCHMARK(double, 128) -DEFINE_BENCHMARK(double, 256) -DEFINE_BENCHMARK(double, 512) -DEFINE_BENCHMARK(double, 1024) - -#undef DEFINE_BENCHMARK + TExamplesHolder() + : Examples(N) + { + TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u}; + for (auto& x : Examples) { + // operations with non-normalized floating point numbers are rumored to work slower + x = prng.GenRandReal4() + prng.Uniform(1024u); + } + } + }; +} + +#define DEFINE_BENCHMARK(type, count) \ + Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \ + const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \ + const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \ + const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \ + } \ + } \ + \ + Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \ + const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY( \ + (type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \ + } \ + } + +DEFINE_BENCHMARK(float, 2) +DEFINE_BENCHMARK(float, 4) +DEFINE_BENCHMARK(float, 8) +DEFINE_BENCHMARK(float, 16) +DEFINE_BENCHMARK(float, 32) +DEFINE_BENCHMARK(float, 64) +DEFINE_BENCHMARK(float, 128) +DEFINE_BENCHMARK(float, 256) +DEFINE_BENCHMARK(float, 512) +DEFINE_BENCHMARK(float, 1024) +DEFINE_BENCHMARK(double, 2) +DEFINE_BENCHMARK(double, 4) +DEFINE_BENCHMARK(double, 8) +DEFINE_BENCHMARK(double, 16) +DEFINE_BENCHMARK(double, 32) +DEFINE_BENCHMARK(double, 64) +DEFINE_BENCHMARK(double, 128) +DEFINE_BENCHMARK(double, 256) +DEFINE_BENCHMARK(double, 512) +DEFINE_BENCHMARK(double, 1024) + +#undef DEFINE_BENCHMARK diff --git a/library/cpp/accurate_accumulate/benchmark/metrics/main.py b/library/cpp/accurate_accumulate/benchmark/metrics/main.py index dc90060625..311fc219ce 100644 --- a/library/cpp/accurate_accumulate/benchmark/metrics/main.py +++ b/library/cpp/accurate_accumulate/benchmark/metrics/main.py @@ -1,7 +1,7 @@ -import yatest.common as yc - - -def test_export_metrics(metrics): - metrics.set_benchmark(yc.execute_benchmark( +import yatest.common as yc + + +def test_export_metrics(metrics): + metrics.set_benchmark(yc.execute_benchmark( 'library/cpp/accurate_accumulate/benchmark/benchmark', - threads=8)) + threads=8)) diff --git a/library/cpp/accurate_accumulate/benchmark/metrics/ya.make b/library/cpp/accurate_accumulate/benchmark/metrics/ya.make index 45ef7a464e..5d532e1479 100644 --- a/library/cpp/accurate_accumulate/benchmark/metrics/ya.make +++ b/library/cpp/accurate_accumulate/benchmark/metrics/ya.make @@ -1,17 +1,17 @@ OWNER(yazevnul) - + PY2TEST() - + SIZE(LARGE) - -TAG( + +TAG( ya:force_sandbox - sb:intel_e5_2660v1 + sb:intel_e5_2660v1 ya:fat -) - +) + TEST_SRCS(main.py) - + DEPENDS(library/cpp/accurate_accumulate/benchmark) - -END() + +END() diff --git a/library/cpp/accurate_accumulate/benchmark/ya.make b/library/cpp/accurate_accumulate/benchmark/ya.make index 48b8486966..20fd877389 100644 --- a/library/cpp/accurate_accumulate/benchmark/ya.make +++ b/library/cpp/accurate_accumulate/benchmark/ya.make @@ -1,13 +1,13 @@ OWNER(yazevnul) - + Y_BENCHMARK() - -SRCS( - main.cpp -) - -PEERDIR( + +SRCS( + main.cpp +) + +PEERDIR( library/cpp/accurate_accumulate -) - -END() +) + +END() |