diff options
author | thegeorg <thegeorg@yandex-team.com> | 2023-06-14 13:05:42 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2023-06-14 13:05:42 +0300 |
commit | 4b972da4fb8c047e6c1b876f3d026f213cade3b7 (patch) | |
tree | 4c7453323a0a572846b42a1091859c4867ae68f4 /contrib/restricted | |
parent | bb4d56c9f2833ca3622dce9ee4a799910ea1549c (diff) | |
download | ydb-4b972da4fb8c047e6c1b876f3d026f213cade3b7.tar.gz |
Update contrib/restricted/google/benchmark to 1.8.0
Diffstat (limited to 'contrib/restricted')
33 files changed, 834 insertions, 407 deletions
diff --git a/contrib/restricted/google/benchmark/AUTHORS b/contrib/restricted/google/benchmark/AUTHORS index 98d2d98b05..bafecaddb5 100644 --- a/contrib/restricted/google/benchmark/AUTHORS +++ b/contrib/restricted/google/benchmark/AUTHORS @@ -32,6 +32,7 @@ Federico Ficarelli <federico.ficarelli@gmail.com> Felix Homann <linuxaudio@showlabor.de> Gergő Szitár <szitar.gergo@gmail.com> Google Inc. +Henrique Bucher <hbucher@gmail.com> International Business Machines Corporation Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com> Jern-Kuan Leong <jernkuan@gmail.com> @@ -42,8 +43,10 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com> Kaito Udagawa <umireon@gmail.com> Kishan Kumar <kumar.kishan@outlook.com> Lei Xu <eddyxu@gmail.com> +Marcel Jacobse <mjacobse@uni-bremen.de> Matt Clarkson <mattyclarkson@gmail.com> Maxim Vafin <maxvafin@gmail.com> +Mike Apodaca <gatorfax@gmail.com> MongoDB Inc. Nick Hutchinson <nshutchinson@gmail.com> Norman Heino <norman.heino@gmail.com> diff --git a/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt index cfad7dc3cc..0e16d88f39 100644 --- a/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt +++ b/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt @@ -13,6 +13,7 @@ target_compile_options(restricted-google-benchmark PUBLIC ) target_compile_options(restricted-google-benchmark PRIVATE -DHAVE_POSIX_REGEX + -DHAVE_PTHREAD_AFFINITY -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> @@ -42,7 +43,6 @@ target_sources(restricted-google-benchmark PRIVATE ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc - ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc diff --git a/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt b/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt index d1f192eee7..a18f8b0963 100644 --- a/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt +++ b/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt @@ -13,8 +13,10 @@ target_compile_options(restricted-google-benchmark PUBLIC ) target_compile_options(restricted-google-benchmark PRIVATE -DHAVE_POSIX_REGEX + -DHAVE_PTHREAD_AFFINITY -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK + -DBENCHMARK_HAS_PTHREAD_AFFINITY $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> ) target_include_directories(restricted-google-benchmark PUBLIC @@ -43,7 +45,6 @@ target_sources(restricted-google-benchmark PRIVATE ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc - ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc diff --git a/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt index d1f192eee7..a18f8b0963 100644 --- a/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt +++ b/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt @@ -13,8 +13,10 @@ target_compile_options(restricted-google-benchmark PUBLIC ) target_compile_options(restricted-google-benchmark PRIVATE -DHAVE_POSIX_REGEX + -DHAVE_PTHREAD_AFFINITY -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK + -DBENCHMARK_HAS_PTHREAD_AFFINITY $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> ) target_include_directories(restricted-google-benchmark PUBLIC @@ -43,7 +45,6 @@ target_sources(restricted-google-benchmark PRIVATE ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc - ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc diff --git a/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt index cfad7dc3cc..0e16d88f39 100644 --- a/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt +++ b/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt @@ -13,6 +13,7 @@ target_compile_options(restricted-google-benchmark PUBLIC ) target_compile_options(restricted-google-benchmark PRIVATE -DHAVE_POSIX_REGEX + -DHAVE_PTHREAD_AFFINITY -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> @@ -42,7 +43,6 @@ target_sources(restricted-google-benchmark PRIVATE ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc - ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc diff --git a/contrib/restricted/google/benchmark/README.md b/contrib/restricted/google/benchmark/README.md index 205fb008af..b64048b7d3 100644 --- a/contrib/restricted/google/benchmark/README.md +++ b/contrib/restricted/google/benchmark/README.md @@ -5,7 +5,7 @@ [](https://github.com/google/benchmark/actions?query=workflow%3Apylint) [](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) -[](https://travis-ci.org/google/benchmark) +[](https://travis-ci.org/google/benchmark) [](https://coveralls.io/r/google/benchmark) @@ -33,7 +33,7 @@ To get started, see [Requirements](#requirements) and [Installation](#installation). See [Usage](#usage) for a full example and the [User Guide](docs/user_guide.md) for a more comprehensive feature overview. -It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md) +It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md) as some of the structural aspects of the APIs are similar. ## Resources @@ -139,6 +139,12 @@ cache variables, if autodetection fails. If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. +To enable sanitizer checks (eg., `asan` and `tsan`), add: +``` + -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all" + -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all " +``` + ### Stable and Experimental Library Versions The main branch contains the latest stable version of the benchmarking library; diff --git a/contrib/restricted/google/benchmark/include/benchmark/benchmark.h b/contrib/restricted/google/benchmark/include/benchmark/benchmark.h index 77dcfbdc2a..4a8be19d86 100644 --- a/contrib/restricted/google/benchmark/include/benchmark/benchmark.h +++ b/contrib/restricted/google/benchmark/include/benchmark/benchmark.h @@ -218,6 +218,18 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_UNUSED #endif +// Used to annotate functions, methods and classes so they +// are not optimized by the compiler. Useful for tests +// where you expect loops to stay in place churning cycles +#if defined(__clang__) +#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone)) +#elif defined(__GNUC__) || defined(__GNUG__) +#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0))) +#else +// MSVC & Intel do not have a no-optimize attribute, only line pragmas +#define BENCHMARK_DONT_OPTIMIZE +#endif + #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) #elif defined(_MSC_VER) && !defined(__clang__) @@ -231,13 +243,20 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) // clang-format off -#if defined(__GNUC__) && !defined(__NVCC__) || defined(__clang__) +#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) #define BENCHMARK_DISABLE_DEPRECATED_WARNING \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop") +#elif defined(__NVCOMPILER) +#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) +#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING \ + _Pragma("diagnostic push") \ + _Pragma("diag_suppress deprecated_entity_with_custom_message") +#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop") #else #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) @@ -280,6 +299,9 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); namespace benchmark { class BenchmarkReporter; +// Default number of minimum benchmark running time in seconds. +const char kDefaultMinTimeStr[] = "0.5s"; + BENCHMARK_EXPORT void PrintDefaultHelp(); BENCHMARK_EXPORT void Initialize(int* argc, char** argv, @@ -383,13 +405,7 @@ class MemoryManager { virtual void Start() = 0; // Implement this to stop recording and fill out the given Result structure. - BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead") - virtual void Stop(Result* result) = 0; - - // FIXME(vyng): Make this pure virtual once we've migrated current users. - BENCHMARK_DISABLE_DEPRECATED_WARNING - virtual void Stop(Result& result) { Stop(&result); } - BENCHMARK_RESTORE_DEPRECATED_WARNING + virtual void Stop(Result& result) = 0; }; // Register a MemoryManager instance that will be used to collect and report @@ -441,12 +457,21 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY #if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER) template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "r,m"(value) : "memory"); } template <class Tp> -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize( +#ifdef BENCHMARK_HAS_CXX11 + Tp&& value +#else + Tp& value +#endif +) { #if defined(__clang__) asm volatile("" : "+r,m"(value) : : "memory"); #else @@ -457,6 +482,9 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { // Workaround for a bug with full argument copy overhead with GCC. // See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519 template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE typename std::enable_if<std::is_trivially_copyable<Tp>::value && (sizeof(Tp) <= sizeof(Tp*))>::type @@ -465,6 +493,9 @@ inline BENCHMARK_ALWAYS_INLINE } template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE typename std::enable_if<!std::is_trivially_copyable<Tp>::value || (sizeof(Tp) > sizeof(Tp*))>::type @@ -476,7 +507,7 @@ template <class Tp> inline BENCHMARK_ALWAYS_INLINE typename std::enable_if<std::is_trivially_copyable<Tp>::value && (sizeof(Tp) <= sizeof(Tp*))>::type - DoNotOptimize(Tp& value) { + DoNotOptimize(Tp&& value) { asm volatile("" : "+m,r"(value) : : "memory"); } @@ -484,7 +515,7 @@ template <class Tp> inline BENCHMARK_ALWAYS_INLINE typename std::enable_if<!std::is_trivially_copyable<Tp>::value || (sizeof(Tp) > sizeof(Tp*))>::type - DoNotOptimize(Tp& value) { + DoNotOptimize(Tp&& value) { asm volatile("" : "+m"(value) : : "memory"); } @@ -493,12 +524,21 @@ inline BENCHMARK_ALWAYS_INLINE // to use memory operations instead of operations with registers. // TODO: Remove if GCC < 5 will be unsupported. template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "m"(value) : "memory"); } template <class Tp> -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize( +#ifdef BENCHMARK_HAS_CXX11 + Tp&& value +#else + Tp& value +#endif +) { asm volatile("" : "+m"(value) : : "memory"); } #endif @@ -510,6 +550,9 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { #endif #elif defined(_MSC_VER) template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); _ReadWriteBarrier(); @@ -520,6 +563,9 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } #endif #else template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); } @@ -640,6 +686,16 @@ enum AggregationReportMode ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly }; +enum Skipped +#if defined(BENCHMARK_HAS_CXX11) + : unsigned +#endif +{ + NotSkipped = 0, + SkippedWithMessage, + SkippedWithError +}; + } // namespace internal // State is passed to a running Benchmark and contains state for the @@ -676,8 +732,8 @@ class BENCHMARK_EXPORT State { // } bool KeepRunningBatch(IterationCount n); - // REQUIRES: timer is running and 'SkipWithError(...)' has not been called - // by the current thread. + // REQUIRES: timer is running and 'SkipWithMessage(...)' or + // 'SkipWithError(...)' has not been called by the current thread. // Stop the benchmark timer. If not called, the timer will be // automatically stopped after the last iteration of the benchmark loop. // @@ -692,8 +748,8 @@ class BENCHMARK_EXPORT State { // within each benchmark iteration, if possible. void PauseTiming(); - // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called - // by the current thread. + // REQUIRES: timer is not running and 'SkipWithMessage(...)' or + // 'SkipWithError(...)' has not been called by the current thread. // Start the benchmark timer. The timer is NOT running on entrance to the // benchmark function. It begins running after control flow enters the // benchmark loop. @@ -703,8 +759,30 @@ class BENCHMARK_EXPORT State { // within each benchmark iteration, if possible. void ResumeTiming(); - // REQUIRES: 'SkipWithError(...)' has not been called previously by the - // current thread. + // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been + // called previously by the current thread. + // Report the benchmark as resulting in being skipped with the specified + // 'msg'. + // After this call the user may explicitly 'return' from the benchmark. + // + // If the ranged-for style of benchmark loop is used, the user must explicitly + // break from the loop, otherwise all future iterations will be run. + // If the 'KeepRunning()' loop is used the current thread will automatically + // exit the loop at the end of the current iteration. + // + // For threaded benchmarks only the current thread stops executing and future + // calls to `KeepRunning()` will block until all threads have completed + // the `KeepRunning()` loop. If multiple threads report being skipped only the + // first skip message is used. + // + // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit + // the current scope immediately. If the function is called from within + // the 'KeepRunning()' loop the current iteration will finish. It is the users + // responsibility to exit the scope as needed. + void SkipWithMessage(const std::string& msg); + + // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been + // called previously by the current thread. // Report the benchmark as resulting in an error with the specified 'msg'. // After this call the user may explicitly 'return' from the benchmark. // @@ -722,10 +800,13 @@ class BENCHMARK_EXPORT State { // the current scope immediately. If the function is called from within // the 'KeepRunning()' loop the current iteration will finish. It is the users // responsibility to exit the scope as needed. - void SkipWithError(const char* msg); + void SkipWithError(const std::string& msg); + + // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called. + bool skipped() const { return internal::NotSkipped != skipped_; } // Returns true if an error has been reported with 'SkipWithError(...)'. - bool error_occurred() const { return error_occurred_; } + bool error_occurred() const { return internal::SkippedWithError == skipped_; } // REQUIRES: called exactly once per iteration of the benchmarking loop. // Set the manually measured time for this benchmark iteration, which @@ -796,11 +877,7 @@ class BENCHMARK_EXPORT State { // BM_Compress 50 50 14115038 compress:27.3% // // REQUIRES: a benchmark has exited its benchmarking loop. - void SetLabel(const char* label); - - void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { - this->SetLabel(str.c_str()); - } + void SetLabel(const std::string& label); // Range arguments for this run. CHECKs if the argument has been set. BENCHMARK_ALWAYS_INLINE @@ -831,6 +908,9 @@ class BENCHMARK_EXPORT State { return max_iterations - total_iterations_ + batch_leftover_; } + BENCHMARK_ALWAYS_INLINE + std::string name() const { return name_; } + private: // items we expect on the first cache line (ie 64 bytes of the struct) // When total_iterations_ is 0, KeepRunning() and friends will return false. @@ -848,7 +928,7 @@ class BENCHMARK_EXPORT State { private: bool started_; bool finished_; - bool error_occurred_; + internal::Skipped skipped_; // items we don't need on the first cache line std::vector<int64_t> range_; @@ -860,9 +940,9 @@ class BENCHMARK_EXPORT State { UserCounters counters; private: - State(IterationCount max_iters, const std::vector<int64_t>& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager, + State(std::string name, IterationCount max_iters, + const std::vector<int64_t>& ranges, int thread_i, int n_threads, + internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement); void StartKeepRunning(); @@ -871,6 +951,7 @@ class BENCHMARK_EXPORT State { bool KeepRunningInternal(IterationCount n, bool is_batch); void FinishKeepRunning(); + const std::string name_; const int thread_index_; const int threads_; @@ -902,7 +983,7 @@ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n, } if (!started_) { StartKeepRunning(); - if (!error_occurred_ && total_iterations_ >= n) { + if (!skipped() && total_iterations_ >= n) { total_iterations_ -= n; return true; } @@ -932,7 +1013,7 @@ struct State::StateIterator { BENCHMARK_ALWAYS_INLINE explicit StateIterator(State* st) - : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {} + : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {} public: BENCHMARK_ALWAYS_INLINE @@ -1083,11 +1164,12 @@ class BENCHMARK_EXPORT Benchmark { Benchmark* MinWarmUpTime(double t); // Specify the amount of iterations that should be run by this benchmark. + // This option overrides the `benchmark_min_time` flag. // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. // // NOTE: This function should only be used when *exact* iteration control is // needed and never to control or limit how long a benchmark runs, where - // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. + // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead. Benchmark* Iterations(IterationCount n); // Specify the amount of times to repeat this benchmark. This option overrides @@ -1175,10 +1257,13 @@ class BENCHMARK_EXPORT Benchmark { TimeUnit GetTimeUnit() const; protected: - explicit Benchmark(const char* name); - void SetName(const char* name); + explicit Benchmark(const std::string& name); + void SetName(const std::string& name); + public: + const char* GetName() const; int ArgsCnt() const; + const char* GetArgName(int arg) const; private: friend class BenchmarkFamilies; @@ -1228,12 +1313,12 @@ class BENCHMARK_EXPORT Benchmark { // the specified functor 'fn'. // // RETURNS: A pointer to the registered benchmark. -internal::Benchmark* RegisterBenchmark(const char* name, +internal::Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn); #if defined(BENCHMARK_HAS_CXX11) template <class Lambda> -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn); +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn); #endif // Remove all registered benchmarks. All pointers to previously registered @@ -1245,10 +1330,10 @@ namespace internal { // (ie those created using the BENCHMARK(...) macros. class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark { public: - FunctionBenchmark(const char* name, Function* func) + FunctionBenchmark(const std::string& name, Function* func) : Benchmark(name), func_(func) {} - virtual void Run(State& st) BENCHMARK_OVERRIDE; + void Run(State& st) BENCHMARK_OVERRIDE; private: Function* func_; @@ -1258,24 +1343,24 @@ class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark { template <class Lambda> class LambdaBenchmark : public Benchmark { public: - virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } + void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } private: template <class OLambda> - LambdaBenchmark(const char* name, OLambda&& lam) + LambdaBenchmark(const std::string& name, OLambda&& lam) : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {} LambdaBenchmark(LambdaBenchmark const&) = delete; template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration) - friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); + friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&); Lambda lambda_; }; #endif } // namespace internal -inline internal::Benchmark* RegisterBenchmark(const char* name, +inline internal::Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn) { return internal::RegisterBenchmarkInternal( ::new internal::FunctionBenchmark(name, fn)); @@ -1283,7 +1368,7 @@ inline internal::Benchmark* RegisterBenchmark(const char* name, #ifdef BENCHMARK_HAS_CXX11 template <class Lambda> -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) { using BenchType = internal::LambdaBenchmark<typename std::decay<Lambda>::type>; return internal::RegisterBenchmarkInternal( @@ -1294,7 +1379,7 @@ internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { #if defined(BENCHMARK_HAS_CXX11) && \ (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409) template <class Lambda, class... Args> -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn, +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn, Args&&... args) { return benchmark::RegisterBenchmark( name, [=](benchmark::State& st) { fn(st, args...); }); @@ -1308,7 +1393,7 @@ class Fixture : public internal::Benchmark { public: Fixture() : internal::Benchmark("") {} - virtual void Run(State& st) BENCHMARK_OVERRIDE { + void Run(State& st) BENCHMARK_OVERRIDE { this->SetUp(st); this->BenchmarkCase(st); this->TearDown(st); @@ -1363,7 +1448,7 @@ class Fixture : public internal::Benchmark { BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \ - &__VA_ARGS__))) + __VA_ARGS__))) #else #define BENCHMARK(n) \ BENCHMARK_PRIVATE_DECLARE(n) = \ @@ -1430,37 +1515,37 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) #endif -#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() { \ - this->SetName(#BaseClass "/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; -#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \ - public: \ - BaseClass##_##Method##_Benchmark() { \ - this->SetName(#BaseClass "<" #a ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ +#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ + class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "<" #a ">/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; -#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \ - public: \ - BaseClass##_##Method##_Benchmark() { \ - this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ +#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ + class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #ifdef BENCHMARK_HAS_CXX11 @@ -1472,7 +1557,7 @@ class Fixture : public internal::Benchmark { } \ \ protected: \ - virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #else #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ @@ -1630,7 +1715,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { Run() : run_type(RT_Iteration), aggregate_unit(kTime), - error_occurred(false), + skipped(internal::NotSkipped), iterations(1), threads(1), time_unit(GetDefaultTimeUnit()), @@ -1653,12 +1738,11 @@ class BENCHMARK_EXPORT BenchmarkReporter { std::string aggregate_name; StatisticUnit aggregate_unit; std::string report_label; // Empty if not set by benchmark. - bool error_occurred; - std::string error_message; + internal::Skipped skipped; + std::string skip_message; // Total iterations across all threads. IterationCount iterations; - int64_t threads; int64_t repetition_index; int64_t repetitions; @@ -1728,6 +1812,12 @@ class BENCHMARK_EXPORT BenchmarkReporter { virtual bool ReportContext(const Context& context) = 0; // Called once for each group of benchmark runs, gives information about + // the configurations of the runs. + virtual void ReportRunsConfig(double /*min_time*/, + bool /*has_explicit_iters*/, + IterationCount /*iters*/) {} + + // Called once for each group of benchmark runs, gives information about // cpu-time and heap memory usage during the benchmark run. If the group // of runs contained more than two entries then 'report' contains additional // elements representing the mean and standard deviation of those runs. @@ -1784,8 +1874,8 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter { explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) : output_options_(opts_), name_field_width_(0), printed_header_(false) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; protected: virtual void PrintRunData(const Run& report); @@ -1800,9 +1890,9 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter { class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter { public: JSONReporter() : first_report_(true) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; - virtual void Finalize() BENCHMARK_OVERRIDE; + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; + void Finalize() BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1815,8 +1905,8 @@ class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG( : public BenchmarkReporter { public: CSVReporter() : printed_header_(false) {} - virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; - virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1855,7 +1945,7 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) { // Creates a list of integer values for the given range and multiplier. // This can be used together with ArgsProduct() to allow multiple ranges -// with different multiplers. +// with different multipliers. // Example: // ArgsProduct({ // CreateRange(0, 1024, /*multi=*/32), diff --git a/contrib/restricted/google/benchmark/src/benchmark.cc b/contrib/restricted/google/benchmark/src/benchmark.cc index ff2864804c..f1633b703f 100644 --- a/contrib/restricted/google/benchmark/src/benchmark.cc +++ b/contrib/restricted/google/benchmark/src/benchmark.cc @@ -65,16 +65,25 @@ BM_DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Minimum number of seconds we should run benchmark before results are -// considered significant. For cpu-time based tests, this is the lower bound +// Specification of how long to run the benchmark. +// +// It can be either an exact number of iterations (specified as `<integer>x`), +// or a minimum number of seconds (specified as `<float>s`). If the latter +// format (ie., min seconds) is used, the system may run the benchmark longer +// until the results are considered significant. +// +// For backward compatibility, the `s` suffix may be omitted, in which case, +// the specified number is interpreted as the number of seconds. +// +// For cpu-time based tests, this is the lower bound // on the total cpu time used by all threads that make up the test. For // real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. -BM_DEFINE_double(benchmark_min_time, 0.5); +BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); // Minimum number of seconds a benchmark should be run before results should be -// taken into account. This e.g can be neccessary for benchmarks of code which -// needs to fill some form of cache before performance is of interrest. +// taken into account. This e.g can be necessary for benchmarks of code which +// needs to fill some form of cache before performance is of interest. // Note: results gathered within this period are discarded and not used for // reported result. BM_DEFINE_double(benchmark_min_warmup_time, 0.0); @@ -148,18 +157,19 @@ void UseCharPointer(char const volatile*) {} } // namespace internal -State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager, +State::State(std::string name, IterationCount max_iters, + const std::vector<int64_t>& ranges, int thread_i, int n_threads, + internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement) : total_iterations_(0), batch_leftover_(0), max_iterations(max_iters), started_(false), finished_(false), - error_occurred_(false), + skipped_(internal::NotSkipped), range_(ranges), complexity_n_(0), + name_(std::move(name)), thread_index_(thread_i), threads_(n_threads), timer_(timer), @@ -186,11 +196,14 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, #pragma nv_diagnostic push #pragma nv_diag_suppress 1427 #endif +#if defined(__NVCOMPILER) +#pragma diagnostic push +#pragma diag_suppress offset_in_non_POD_nonstandard +#endif // Offset tests to ensure commonly accessed data is on the first cache line. const int cache_line_size = 64; - static_assert(offsetof(State, error_occurred_) <= - (cache_line_size - sizeof(error_occurred_)), - ""); + static_assert( + offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), ""); #if defined(__INTEL_COMPILER) #pragma warning pop #elif defined(__GNUC__) @@ -199,11 +212,14 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, #if defined(__NVCC__) #pragma nv_diagnostic pop #endif +#if defined(__NVCOMPILER) +#pragma diagnostic pop +#endif } void State::PauseTiming() { // Add in time accumulated so far - BM_CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !skipped()); timer_->StopTimer(); if (perf_counters_measurement_) { std::vector<std::pair<std::string, double>> measurements; @@ -220,21 +236,33 @@ void State::PauseTiming() { } void State::ResumeTiming() { - BM_CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !skipped()); timer_->StartTimer(); if (perf_counters_measurement_) { perf_counters_measurement_->Start(); } } -void State::SkipWithError(const char* msg) { - BM_CHECK(msg); - error_occurred_ = true; +void State::SkipWithMessage(const std::string& msg) { + skipped_ = internal::SkippedWithMessage; { MutexLock l(manager_->GetBenchmarkMutex()); - if (manager_->results.has_error_ == false) { - manager_->results.error_message_ = msg; - manager_->results.has_error_ = true; + if (internal::NotSkipped == manager_->results.skipped_) { + manager_->results.skip_message_ = msg; + manager_->results.skipped_ = skipped_; + } + } + total_iterations_ = 0; + if (timer_->running()) timer_->StopTimer(); +} + +void State::SkipWithError(const std::string& msg) { + skipped_ = internal::SkippedWithError; + { + MutexLock l(manager_->GetBenchmarkMutex()); + if (internal::NotSkipped == manager_->results.skipped_) { + manager_->results.skip_message_ = msg; + manager_->results.skipped_ = skipped_; } } total_iterations_ = 0; @@ -245,7 +273,7 @@ void State::SetIterationTime(double seconds) { timer_->SetIterationTime(seconds); } -void State::SetLabel(const char* label) { +void State::SetLabel(const std::string& label) { MutexLock l(manager_->GetBenchmarkMutex()); manager_->results.report_label_ = label; } @@ -253,14 +281,14 @@ void State::SetLabel(const char* label) { void State::StartKeepRunning() { BM_CHECK(!started_ && !finished_); started_ = true; - total_iterations_ = error_occurred_ ? 0 : max_iterations; + total_iterations_ = skipped() ? 0 : max_iterations; manager_->StartStopBarrier(); - if (!error_occurred_) ResumeTiming(); + if (!skipped()) ResumeTiming(); } void State::FinishKeepRunning() { - BM_CHECK(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { + BM_CHECK(started_ && (!finished_ || skipped())); + if (!skipped()) { PauseTiming(); } // Total iterations has now wrapped around past 0. Fix this. @@ -338,14 +366,26 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, size_t num_repetitions_total = 0; + // This perfcounters object needs to be created before the runners vector + // below so it outlasts their lifetime. + PerfCountersMeasurement perfcounters( + StrSplit(FLAGS_benchmark_perf_counters, ',')); + + // Vector of benchmarks to run std::vector<internal::BenchmarkRunner> runners; runners.reserve(benchmarks.size()); + + // Count the number of benchmarks with threads to warn the user in case + // performance counters are used. + int benchmarks_with_threads = 0; + + // Loop through all benchmarks for (const BenchmarkInstance& benchmark : benchmarks) { BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; if (benchmark.complexity() != oNone) reports_for_family = &per_family_reports[benchmark.family_index()]; - - runners.emplace_back(benchmark, reports_for_family); + benchmarks_with_threads += (benchmark.threads() > 0); + runners.emplace_back(benchmark, &perfcounters, reports_for_family); int num_repeats_of_this_instance = runners.back().GetNumRepeats(); num_repetitions_total += num_repeats_of_this_instance; if (reports_for_family) @@ -353,6 +393,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, } assert(runners.size() == benchmarks.size() && "Unexpected runner count."); + // The use of performance counters with threads would be unintuitive for + // the average user so we need to warn them about this case + if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) { + GetErrorLogInstance() + << "***WARNING*** There are " << benchmarks_with_threads + << " benchmarks with threads and " << perfcounters.num_counters() + << " performance counters were requested. Beware counters will " + "reflect the combined usage across all " + "threads.\n"; + } + std::vector<size_t> repetition_indices; repetition_indices.reserve(num_repetitions_total); for (size_t runner_index = 0, num_runners = runners.size(); @@ -376,6 +427,12 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, if (runner.HasRepeatsRemaining()) continue; // FIXME: report each repetition separately, not all of them in bulk. + display_reporter->ReportRunsConfig( + runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); + if (file_reporter) + file_reporter->ReportRunsConfig( + runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); + RunResults run_results = runner.GetResults(); // Maybe calculate complexity report @@ -409,14 +466,15 @@ std::unique_ptr<BenchmarkReporter> CreateReporter( typedef std::unique_ptr<BenchmarkReporter> PtrType; if (name == "console") { return PtrType(new ConsoleReporter(output_opts)); - } else if (name == "json") { + } + if (name == "json") { return PtrType(new JSONReporter()); - } else if (name == "csv") { + } + if (name == "csv") { return PtrType(new CSVReporter()); - } else { - std::cerr << "Unexpected format: '" << name << "'\n"; - std::exit(1); } + std::cerr << "Unexpected format: '" << name << "'\n"; + std::exit(1); } BENCHMARK_RESTORE_DEPRECATED_WARNING @@ -585,13 +643,17 @@ void PrintUsageAndExit() { void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) { if (time_unit_flag == "s") { return SetDefaultTimeUnit(kSecond); - } else if (time_unit_flag == "ms") { + } + if (time_unit_flag == "ms") { return SetDefaultTimeUnit(kMillisecond); - } else if (time_unit_flag == "us") { + } + if (time_unit_flag == "us") { return SetDefaultTimeUnit(kMicrosecond); - } else if (time_unit_flag == "ns") { + } + if (time_unit_flag == "ns") { return SetDefaultTimeUnit(kNanosecond); - } else if (!time_unit_flag.empty()) { + } + if (!time_unit_flag.empty()) { PrintUsageAndExit(); } } @@ -604,7 +666,7 @@ void ParseCommandLineFlags(int* argc, char** argv) { if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseDoubleFlag(argv[i], "benchmark_min_time", + ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", &FLAGS_benchmark_min_warmup_time) || @@ -665,7 +727,7 @@ void PrintDefaultHelp() { "benchmark" " [--benchmark_list_tests={true|false}]\n" " [--benchmark_filter=<regex>]\n" - " [--benchmark_min_time=<min_time>]\n" + " [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n" " [--benchmark_min_warmup_time=<min_warmup_time>]\n" " [--benchmark_repetitions=<num_repetitions>]\n" " [--benchmark_enable_random_interleaving={true|false}]\n" @@ -676,6 +738,9 @@ void PrintDefaultHelp() { " [--benchmark_out_format=<json|console|csv>]\n" " [--benchmark_color={auto|true|false}]\n" " [--benchmark_counters_tabular={true|false}]\n" +#if defined HAVE_LIBPFM + " [--benchmark_perf_counters=<counter>,...]\n" +#endif " [--benchmark_context=<key>=<value>,...]\n" " [--benchmark_time_unit={ns|us|ms|s}]\n" " [--v=<verbosity>]\n"); diff --git a/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc b/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc index 963fea22f3..286f986530 100644 --- a/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc +++ b/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc @@ -93,24 +93,24 @@ State BenchmarkInstance::Run( IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement) const { - State st(iters, args_, thread_id, threads_, timer, manager, - perf_counters_measurement); + State st(name_.function_name, iters, args_, thread_id, threads_, timer, + manager, perf_counters_measurement); benchmark_.Run(st); return st; } void BenchmarkInstance::Setup() const { if (setup_) { - State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, - nullptr); + State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, + nullptr, nullptr, nullptr); setup_(st); } } void BenchmarkInstance::Teardown() const { if (teardown_) { - State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, - nullptr); + State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, + nullptr, nullptr, nullptr); teardown_(st); } } diff --git a/contrib/restricted/google/benchmark/src/benchmark_register.cc b/contrib/restricted/google/benchmark/src/benchmark_register.cc index eae2c320f6..e447c9a2d3 100644 --- a/contrib/restricted/google/benchmark/src/benchmark_register.cc +++ b/contrib/restricted/google/benchmark/src/benchmark_register.cc @@ -204,7 +204,7 @@ bool FindBenchmarksInternal(const std::string& re, // Benchmark //=============================================================================// -Benchmark::Benchmark(const char* name) +Benchmark::Benchmark(const std::string& name) : name_(name), aggregation_report_mode_(ARM_Unspecified), time_unit_(GetDefaultTimeUnit()), @@ -230,7 +230,7 @@ Benchmark::Benchmark(const char* name) Benchmark::~Benchmark() {} Benchmark* Benchmark::Name(const std::string& name) { - SetName(name.c_str()); + SetName(name); return this; } @@ -468,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() { return this; } -void Benchmark::SetName(const char* name) { name_ = name; } +void Benchmark::SetName(const std::string& name) { name_ = name; } + +const char* Benchmark::GetName() const { return name_.c_str(); } int Benchmark::ArgsCnt() const { if (args_.empty()) { @@ -478,6 +480,12 @@ int Benchmark::ArgsCnt() const { return static_cast<int>(args_.front().size()); } +const char* Benchmark::GetArgName(int arg) const { + BM_CHECK_GE(arg, 0); + BM_CHECK_LT(arg, static_cast<int>(arg_names_.size())); + return arg_names_[arg].c_str(); +} + TimeUnit Benchmark::GetTimeUnit() const { return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_; } diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.cc b/contrib/restricted/google/benchmark/src/benchmark_runner.cc index f9ffbc5afb..5f683fe423 100644 --- a/contrib/restricted/google/benchmark/src/benchmark_runner.cc +++ b/contrib/restricted/google/benchmark/src/benchmark_runner.cc @@ -28,11 +28,14 @@ #include <algorithm> #include <atomic> +#include <climits> +#include <cmath> #include <condition_variable> #include <cstdio> #include <cstdlib> #include <fstream> #include <iostream> +#include <limits> #include <memory> #include <string> #include <thread> @@ -62,6 +65,8 @@ MemoryManager* memory_manager = nullptr; namespace { static constexpr IterationCount kMaxIterations = 1000000000; +const double kDefaultMinTime = + std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr); BenchmarkReporter::Run CreateRunReport( const benchmark::internal::BenchmarkInstance& b, @@ -75,8 +80,8 @@ BenchmarkReporter::Run CreateRunReport( report.run_name = b.name(); report.family_index = b.family_index(); report.per_family_instance_index = b.per_family_instance_index(); - report.error_occurred = results.has_error_; - report.error_message = results.error_message_; + report.skipped = results.skipped_; + report.skip_message = results.skip_message_; report.report_label = results.report_label_; // This is the total iterations across all threads. report.iterations = results.iterations; @@ -85,7 +90,7 @@ BenchmarkReporter::Run CreateRunReport( report.repetition_index = repetition_index; report.repetitions = repeats; - if (!report.error_occurred) { + if (!report.skipped) { // This is the total time across all threads. if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; @@ -126,7 +131,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, State st = b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); - BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) + BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); @@ -141,27 +146,100 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, manager->NotifyThreadComplete(); } +double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b, + const BenchTimeType& iters_or_time) { + if (!IsZero(b.min_time())) return b.min_time(); + // If the flag was used to specify number of iters, then return the default + // min_time. + if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime; + + return iters_or_time.time; +} + +IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b, + const BenchTimeType& iters_or_time) { + if (b.iterations() != 0) return b.iterations(); + + // We've already concluded that this flag is currently used to pass + // iters but do a check here again anyway. + BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS); + return iters_or_time.iters; +} + } // end namespace +BenchTimeType ParseBenchMinTime(const std::string& value) { + BenchTimeType ret; + + if (value.empty()) { + ret.tag = BenchTimeType::TIME; + ret.time = 0.0; + return ret; + } + + if (value.back() == 'x') { + char* p_end; + // Reset errno before it's changed by strtol. + errno = 0; + IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10); + + // After a valid parse, p_end should have been set to + // point to the 'x' suffix. + BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x') + << "Malformed iters value passed to --benchmark_min_time: `" << value + << "`. Expected --benchmark_min_time=<integer>x."; + + ret.tag = BenchTimeType::ITERS; + ret.iters = num_iters; + return ret; + } + + bool has_suffix = value.back() == 's'; + if (!has_suffix) { + BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. " + "Eg., `30s` for 30-seconds."; + } + + char* p_end; + // Reset errno before it's changed by strtod. + errno = 0; + double min_time = std::strtod(value.c_str(), &p_end); + + // After a successful parse, p_end should point to the suffix 's', + // or the end of the string if the suffix was omitted. + BM_CHECK(errno == 0 && p_end != nullptr && + ((has_suffix && *p_end == 's') || *p_end == '\0')) + << "Malformed seconds value passed to --benchmark_min_time: `" << value + << "`. Expected --benchmark_min_time=<float>x."; + + ret.tag = BenchTimeType::TIME; + ret.time = min_time; + + return ret; +} + BenchmarkRunner::BenchmarkRunner( const benchmark::internal::BenchmarkInstance& b_, + PerfCountersMeasurement* pcm_, BenchmarkReporter::PerFamilyRunReports* reports_for_family_) : b(b_), reports_for_family(reports_for_family_), - min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), + min_time(ComputeMinTime(b_, parsed_benchtime_flag)), min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time), warmup_done(!(min_warmup_time > 0.0)), repeats(b.repetitions() != 0 ? b.repetitions() : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations() != 0), + has_explicit_iteration_count(b.iterations() != 0 || + parsed_benchtime_flag.tag == + BenchTimeType::ITERS), pool(b.threads() - 1), - iters(has_explicit_iteration_count ? b.iterations() : 1), - perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')), - perf_counters_measurement_ptr(perf_counters_measurement.IsValid() - ? &perf_counters_measurement - : nullptr) { + iters(has_explicit_iteration_count + ? ComputeIters(b_, parsed_benchtime_flag) + : 1), + perf_counters_measurement_ptr(pcm_) { run_results.display_report_aggregates_only = (FLAGS_benchmark_report_aggregates_only || FLAGS_benchmark_display_aggregates_only); @@ -174,7 +252,7 @@ BenchmarkRunner::BenchmarkRunner( run_results.file_report_aggregates_only = (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); BM_CHECK(FLAGS_benchmark_perf_counters.empty() || - perf_counters_measurement.IsValid()) + (perf_counters_measurement_ptr->num_counters() == 0)) << "Perf counters were requested but could not be set up."; } } @@ -263,7 +341,7 @@ bool BenchmarkRunner::ShouldReportIterationResults( // Determine if this run should be reported; // Either it has run for a sufficient amount of time // or because an error was reported. - return i.results.has_error_ || + return i.results.skipped_ || i.iters >= kMaxIterations || // Too many iterations already. i.seconds >= GetMinTimeToApply() || // The elapsed time is large enough. @@ -389,10 +467,7 @@ void BenchmarkRunner::DoOneRepetition() { manager->WaitForAllThreads(); manager.reset(); b.Teardown(); - - BENCHMARK_DISABLE_DEPRECATED_WARNING - memory_manager->Stop(memory_result); - BENCHMARK_RESTORE_DEPRECATED_WARNING + memory_manager->Stop(*memory_result); } // Ok, now actually report. @@ -402,7 +477,7 @@ void BenchmarkRunner::DoOneRepetition() { if (reports_for_family) { ++reports_for_family->num_runs_done; - if (!report.error_occurred) reports_for_family->Runs.push_back(report); + if (!report.skipped) reports_for_family->Runs.push_back(report); } run_results.non_aggregates.push_back(report); diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.h b/contrib/restricted/google/benchmark/src/benchmark_runner.h index 0174bd3401..db2fa04396 100644 --- a/contrib/restricted/google/benchmark/src/benchmark_runner.h +++ b/contrib/restricted/google/benchmark/src/benchmark_runner.h @@ -25,7 +25,7 @@ namespace benchmark { -BM_DECLARE_double(benchmark_min_time); +BM_DECLARE_string(benchmark_min_time); BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); @@ -44,9 +44,21 @@ struct RunResults { bool file_report_aggregates_only = false; }; +struct BENCHMARK_EXPORT BenchTimeType { + enum { ITERS, TIME } tag; + union { + IterationCount iters; + double time; + }; +}; + +BENCHMARK_EXPORT +BenchTimeType ParseBenchMinTime(const std::string& value); + class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + benchmark::internal::PerfCountersMeasurement* pmc_, BenchmarkReporter::PerFamilyRunReports* reports_for_family); int GetNumRepeats() const { return repeats; } @@ -63,12 +75,19 @@ class BenchmarkRunner { return reports_for_family; } + double GetMinTime() const { return min_time; } + + bool HasExplicitIters() const { return has_explicit_iteration_count; } + + IterationCount GetIters() const { return iters; } + private: RunResults run_results; const benchmark::internal::BenchmarkInstance& b; BenchmarkReporter::PerFamilyRunReports* reports_for_family; + BenchTimeType parsed_benchtime_flag; const double min_time; const double min_warmup_time; bool warmup_done; @@ -85,8 +104,7 @@ class BenchmarkRunner { // So only the first repetition has to find/calculate it, // the other repetitions will just use that precomputed iteration count. - PerfCountersMeasurement perf_counters_measurement; - PerfCountersMeasurement* const perf_counters_measurement_ptr; + PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr; struct IterationResults { internal::ThreadManager::Result results; diff --git a/contrib/restricted/google/benchmark/src/colorprint.cc b/contrib/restricted/google/benchmark/src/colorprint.cc index 1a000a0637..9a653c5007 100644 --- a/contrib/restricted/google/benchmark/src/colorprint.cc +++ b/contrib/restricted/google/benchmark/src/colorprint.cc @@ -96,18 +96,18 @@ std::string FormatString(const char* msg, va_list args) { // currently there is no error handling for failure, so this is hack. BM_CHECK(ret >= 0); - if (ret == 0) // handle empty expansion + if (ret == 0) { // handle empty expansion return {}; - else if (static_cast<size_t>(ret) < size) + } + if (static_cast<size_t>(ret) < size) { return local_buff; - else { - // we did not provide a long enough buffer on our first attempt. - size = static_cast<size_t>(ret) + 1; // + 1 for the null byte - std::unique_ptr<char[]> buff(new char[size]); - ret = vsnprintf(buff.get(), size, msg, args); - BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size); - return buff.get(); } + // we did not provide a long enough buffer on our first attempt. + size = static_cast<size_t>(ret) + 1; // + 1 for the null byte + std::unique_ptr<char[]> buff(new char[size]); + ret = vsnprintf(buff.get(), size, msg, args); + BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size); + return buff.get(); } std::string FormatString(const char* msg, ...) { @@ -163,7 +163,7 @@ bool IsColorTerminal() { #else // On non-Windows platforms, we rely on the TERM variable. This list of // supported TERM values is copied from Google Test: - // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>. + // <https://github.com/google/googletest/blob/main/googletest/src/gtest.cc#L2925>. const char* const SUPPORTED_TERM_VALUES[] = { "xterm", "xterm-color", "xterm-256color", "screen", "screen-256color", "tmux", diff --git a/contrib/restricted/google/benchmark/src/commandlineflags.cc b/contrib/restricted/google/benchmark/src/commandlineflags.cc index 1f555b2757..dcb414959d 100644 --- a/contrib/restricted/google/benchmark/src/commandlineflags.cc +++ b/contrib/restricted/google/benchmark/src/commandlineflags.cc @@ -284,14 +284,15 @@ bool IsTruthyFlagValue(const std::string& value) { char v = value[0]; return isalnum(v) && !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); - } else if (!value.empty()) { + } + if (!value.empty()) { std::string value_lower(value); std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), [](char c) { return static_cast<char>(::tolower(c)); }); return !(value_lower == "false" || value_lower == "no" || value_lower == "off"); - } else - return true; + } + return true; } } // end namespace benchmark diff --git a/contrib/restricted/google/benchmark/src/complexity.h b/contrib/restricted/google/benchmark/src/complexity.h index df29b48d29..0a0679b48b 100644 --- a/contrib/restricted/google/benchmark/src/complexity.h +++ b/contrib/restricted/google/benchmark/src/complexity.h @@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( const std::vector<BenchmarkReporter::Run>& reports); // This data structure will contain the result returned by MinimalLeastSq -// - coef : Estimated coeficient for the high-order term as +// - coef : Estimated coefficient for the high-order term as // interpolated from data. // - rms : Normalized Root Mean Squared Error. // - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability diff --git a/contrib/restricted/google/benchmark/src/console_reporter.cc b/contrib/restricted/google/benchmark/src/console_reporter.cc index 3950e49814..10e05e133e 100644 --- a/contrib/restricted/google/benchmark/src/console_reporter.cc +++ b/contrib/restricted/google/benchmark/src/console_reporter.cc @@ -115,7 +115,7 @@ static std::string FormatTime(double time) { if (time < 100.0) { return FormatString("%10.1f", time); } - // Assuming the time ist at max 9.9999e+99 and we have 10 digits for the + // Assuming the time is at max 9.9999e+99 and we have 10 digits for the // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print. if (time > 9999999999 /*max 10 digit number*/) { return FormatString("%1.4e", time); @@ -135,9 +135,13 @@ void ConsoleReporter::PrintRunData(const Run& result) { printer(Out, name_color, "%-*s ", name_field_width_, result.benchmark_name().c_str()); - if (result.error_occurred) { + if (internal::SkippedWithError == result.skipped) { printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", - result.error_message.c_str()); + result.skip_message.c_str()); + printer(Out, COLOR_DEFAULT, "\n"); + return; + } else if (internal::SkippedWithMessage == result.skipped) { + printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str()); printer(Out, COLOR_DEFAULT, "\n"); return; } diff --git a/contrib/restricted/google/benchmark/src/csv_reporter.cc b/contrib/restricted/google/benchmark/src/csv_reporter.cc index 83c94573f5..7b56da107e 100644 --- a/contrib/restricted/google/benchmark/src/csv_reporter.cc +++ b/contrib/restricted/google/benchmark/src/csv_reporter.cc @@ -109,10 +109,10 @@ BENCHMARK_EXPORT void CSVReporter::PrintRunData(const Run& run) { std::ostream& Out = GetOutputStream(); Out << CsvEscape(run.benchmark_name()) << ","; - if (run.error_occurred) { + if (run.skipped) { Out << std::string(elements.size() - 3, ','); - Out << "true,"; - Out << CsvEscape(run.error_message) << "\n"; + Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ","; + Out << CsvEscape(run.skip_message) << "\n"; return; } diff --git a/contrib/restricted/google/benchmark/src/cycleclock.h b/contrib/restricted/google/benchmark/src/cycleclock.h index 827f21b927..1295880b2e 100644 --- a/contrib/restricted/google/benchmark/src/cycleclock.h +++ b/contrib/restricted/google/benchmark/src/cycleclock.h @@ -36,7 +36,8 @@ // declarations of some other intrinsics, breaking compilation. // Therefore, we simply declare __rdtsc ourselves. See also // http://connect.microsoft.com/VisualStudio/feedback/details/262047 -#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) +#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \ + !defined(_M_ARM64EC) extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif @@ -114,7 +115,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { // when I know it will work. Otherwise, I'll use __rdtsc and hope // the code is being compiled with a non-ancient compiler. _asm rdtsc -#elif defined(COMPILER_MSVC) && defined(_M_ARM64) +#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC)) // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics // and https://reviews.llvm.org/D53115 int64_t virtual_timer_value; diff --git a/contrib/restricted/google/benchmark/src/internal_macros.h b/contrib/restricted/google/benchmark/src/internal_macros.h index 396a390afb..8dd7d0c650 100644 --- a/contrib/restricted/google/benchmark/src/internal_macros.h +++ b/contrib/restricted/google/benchmark/src/internal_macros.h @@ -42,6 +42,12 @@ #define BENCHMARK_OS_CYGWIN 1 #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 + // WINAPI_FAMILY_PARTITION is defined in winapifamily.h. + // We include windows.h which implicitly includes winapifamily.h for compatibility. + #ifndef NOMINMAX + #define NOMINMAX + #endif + #include <windows.h> #if defined(WINAPI_FAMILY_PARTITION) #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) #define BENCHMARK_OS_WINDOWS_WIN32 1 diff --git a/contrib/restricted/google/benchmark/src/json_reporter.cc b/contrib/restricted/google/benchmark/src/json_reporter.cc index d55a0e6f0b..6559dfd5e6 100644 --- a/contrib/restricted/google/benchmark/src/json_reporter.cc +++ b/contrib/restricted/google/benchmark/src/json_reporter.cc @@ -254,9 +254,12 @@ void JSONReporter::PrintRunData(Run const& run) { BENCHMARK_UNREACHABLE(); }()) << ",\n"; } - if (run.error_occurred) { - out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; - out << indent << FormatKV("error_message", run.error_message) << ",\n"; + if (internal::SkippedWithError == run.skipped) { + out << indent << FormatKV("error_occurred", true) << ",\n"; + out << indent << FormatKV("error_message", run.skip_message) << ",\n"; + } else if (internal::SkippedWithMessage == run.skipped) { + out << indent << FormatKV("skipped", true) << ",\n"; + out << indent << FormatKV("skip_message", run.skip_message) << ",\n"; } if (!run.report_big_o && !run.report_rms) { out << indent << FormatKV("iterations", run.iterations) << ",\n"; @@ -294,7 +297,8 @@ void JSONReporter::PrintRunData(Run const& run) { out << ",\n" << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used); - auto report_if_present = [&out, &indent](const char* label, int64_t val) { + auto report_if_present = [&out, &indent](const std::string& label, + int64_t val) { if (val != MemoryManager::TombstoneValue) out << ",\n" << indent << FormatKV(label, val); }; diff --git a/contrib/restricted/google/benchmark/src/perf_counters.cc b/contrib/restricted/google/benchmark/src/perf_counters.cc index 8a60088ba7..5f2ac282ab 100644 --- a/contrib/restricted/google/benchmark/src/perf_counters.cc +++ b/contrib/restricted/google/benchmark/src/perf_counters.cc @@ -29,96 +29,215 @@ namespace internal { constexpr size_t PerfCounterValues::kMaxCounters; #if defined HAVE_LIBPFM + +size_t PerfCounterValues::Read(const std::vector<int>& leaders) { + // Create a pointer for multiple reads + const size_t bufsize = values_.size() * sizeof(values_[0]); + char* ptr = reinterpret_cast<char*>(values_.data()); + size_t size = bufsize; + for (int lead : leaders) { + auto read_bytes = ::read(lead, ptr, size); + if (read_bytes >= ssize_t(sizeof(uint64_t))) { + // Actual data bytes are all bytes minus initial padding + std::size_t data_bytes = read_bytes - sizeof(uint64_t); + // This should be very cheap since it's in hot cache + std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes); + // Increment our counters + ptr += data_bytes; + size -= data_bytes; + } else { + int err = errno; + GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err + << " " << ::strerror(err) << "\n"; + return 0; + } + } + return (bufsize - size) / sizeof(uint64_t); +} + const bool PerfCounters::kSupported = true; bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; } +bool PerfCounters::IsCounterSupported(const std::string& name) { + perf_event_attr_t attr; + std::memset(&attr, 0, sizeof(attr)); + pfm_perf_encode_arg_t arg; + std::memset(&arg, 0, sizeof(arg)); + arg.attr = &attr; + const int mode = PFM_PLM3; // user mode only + int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT, + &arg); + return (ret == PFM_SUCCESS); +} + PerfCounters PerfCounters::Create( const std::vector<std::string>& counter_names) { - if (counter_names.empty()) { - return NoCounters(); - } - if (counter_names.size() > PerfCounterValues::kMaxCounters) { - GetErrorLogInstance() - << counter_names.size() - << " counters were requested. The minimum is 1, the maximum is " - << PerfCounterValues::kMaxCounters << "\n"; - return NoCounters(); - } - std::vector<int> counter_ids(counter_names.size()); + // Valid counters will populate these arrays but we start empty + std::vector<std::string> valid_names; + std::vector<int> counter_ids; + std::vector<int> leader_ids; - const int mode = PFM_PLM3; // user mode only + // Resize to the maximum possible + valid_names.reserve(counter_names.size()); + counter_ids.reserve(counter_names.size()); + + const int kCounterMode = PFM_PLM3; // user mode only + + // Group leads will be assigned on demand. The idea is that once we cannot + // create a counter descriptor, the reason is that this group has maxed out + // so we set the group_id again to -1 and retry - giving the algorithm a + // chance to create a new group leader to hold the next set of counters. + int group_id = -1; + + // Loop through all performance counters for (size_t i = 0; i < counter_names.size(); ++i) { - const bool is_first = i == 0; - struct perf_event_attr attr {}; - attr.size = sizeof(attr); - const int group_id = !is_first ? counter_ids[0] : -1; + // we are about to push into the valid names vector + // check if we did not reach the maximum + if (valid_names.size() == PerfCounterValues::kMaxCounters) { + // Log a message if we maxed out and stop adding + GetErrorLogInstance() + << counter_names.size() << " counters were requested. The maximum is " + << PerfCounterValues::kMaxCounters << " and " << valid_names.size() + << " were already added. All remaining counters will be ignored\n"; + // stop the loop and return what we have already + break; + } + + // Check if this name is empty const auto& name = counter_names[i]; if (name.empty()) { - GetErrorLogInstance() << "A counter name was the empty string\n"; - return NoCounters(); + GetErrorLogInstance() + << "A performance counter name was the empty string\n"; + continue; } + + // Here first means first in group, ie the group leader + const bool is_first = (group_id < 0); + + // This struct will be populated by libpfm from the counter string + // and then fed into the syscall perf_event_open + struct perf_event_attr attr {}; + attr.size = sizeof(attr); + + // This is the input struct to libpfm. pfm_perf_encode_arg_t arg{}; arg.attr = &attr; - - const int pfm_get = - pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg); + const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode, + PFM_OS_PERF_EVENT, &arg); if (pfm_get != PFM_SUCCESS) { - GetErrorLogInstance() << "Unknown counter name: " << name << "\n"; - return NoCounters(); + GetErrorLogInstance() + << "Unknown performance counter name: " << name << "\n"; + continue; } - attr.disabled = is_first; - // Note: the man page for perf_event_create suggests inerit = true and + + // We then proceed to populate the remaining fields in our attribute struct + // Note: the man page for perf_event_create suggests inherit = true and // read_format = PERF_FORMAT_GROUP don't work together, but that's not the // case. + attr.disabled = is_first; attr.inherit = true; attr.pinned = is_first; attr.exclude_kernel = true; attr.exclude_user = false; attr.exclude_hv = true; - // Read all counters in one read. + + // Read all counters in a group in one read. attr.read_format = PERF_FORMAT_GROUP; int id = -1; - static constexpr size_t kNrOfSyscallRetries = 5; - // Retry syscall as it was interrupted often (b/64774091). - for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; - ++num_retries) { - id = perf_event_open(&attr, 0, -1, group_id, 0); - if (id >= 0 || errno != EINTR) { - break; + while (id < 0) { + static constexpr size_t kNrOfSyscallRetries = 5; + // Retry syscall as it was interrupted often (b/64774091). + for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; + ++num_retries) { + id = perf_event_open(&attr, 0, -1, group_id, 0); + if (id >= 0 || errno != EINTR) { + break; + } + } + if (id < 0) { + // If the file descriptor is negative we might have reached a limit + // in the current group. Set the group_id to -1 and retry + if (group_id >= 0) { + // Create a new group + group_id = -1; + } else { + // At this point we have already retried to set a new group id and + // failed. We then give up. + break; + } } } + + // We failed to get a new file descriptor. We might have reached a hard + // hardware limit that cannot be resolved even with group multiplexing if (id < 0) { - GetErrorLogInstance() - << "Failed to get a file descriptor for " << name << "\n"; - return NoCounters(); - } + GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor " + "for performance counter " + << name << ". Ignoring\n"; - counter_ids[i] = id; + // We give up on this counter but try to keep going + // as the others would be fine + continue; + } + if (group_id < 0) { + // This is a leader, store and assign it to the current file descriptor + leader_ids.push_back(id); + group_id = id; + } + // This is a valid counter, add it to our descriptor's list + counter_ids.push_back(id); + valid_names.push_back(name); } - if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) { - GetErrorLogInstance() << "Failed to start counters\n"; - return NoCounters(); + + // Loop through all group leaders activating them + // There is another option of starting ALL counters in a process but + // that would be far reaching an intrusion. If the user is using PMCs + // by themselves then this would have a side effect on them. It is + // friendlier to loop through all groups individually. + for (int lead : leader_ids) { + if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) { + // This should never happen but if it does, we give up on the + // entire batch as recovery would be a mess. + GetErrorLogInstance() << "***WARNING*** Failed to start counters. " + "Claring out all counters.\n"; + + // Close all peformance counters + for (int id : counter_ids) { + ::close(id); + } + + // Return an empty object so our internal state is still good and + // the process can continue normally without impact + return NoCounters(); + } } - return PerfCounters(counter_names, std::move(counter_ids)); + return PerfCounters(std::move(valid_names), std::move(counter_ids), + std::move(leader_ids)); } void PerfCounters::CloseCounters() const { if (counter_ids_.empty()) { return; } - ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE); + for (int lead : leader_ids_) { + ioctl(lead, PERF_EVENT_IOC_DISABLE); + } for (int fd : counter_ids_) { close(fd); } } #else // defined HAVE_LIBPFM +size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; } + const bool PerfCounters::kSupported = false; bool PerfCounters::Initialize() { return false; } +bool PerfCounters::IsCounterSupported(const std::string&) { return false; } + PerfCounters PerfCounters::Create( const std::vector<std::string>& counter_names) { if (!counter_names.empty()) { @@ -130,31 +249,10 @@ PerfCounters PerfCounters::Create( void PerfCounters::CloseCounters() const {} #endif // defined HAVE_LIBPFM -Mutex PerfCountersMeasurement::mutex_; -int PerfCountersMeasurement::ref_count_ = 0; -PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters(); - PerfCountersMeasurement::PerfCountersMeasurement( const std::vector<std::string>& counter_names) : start_values_(counter_names.size()), end_values_(counter_names.size()) { - MutexLock l(mutex_); - if (ref_count_ == 0) { - counters_ = PerfCounters::Create(counter_names); - } - // We chose to increment it even if `counters_` ends up invalid, - // so that we don't keep trying to create, and also since the dtor - // will decrement regardless of `counters_`'s validity - ++ref_count_; - - BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names); -} - -PerfCountersMeasurement::~PerfCountersMeasurement() { - MutexLock l(mutex_); - --ref_count_; - if (ref_count_ == 0) { - counters_ = PerfCounters::NoCounters(); - } + counters_ = PerfCounters::Create(counter_names); } PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { @@ -162,6 +260,7 @@ PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { CloseCounters(); counter_ids_ = std::move(other.counter_ids_); + leader_ids_ = std::move(other.leader_ids_); counter_names_ = std::move(other.counter_names_); } return *this; diff --git a/contrib/restricted/google/benchmark/src/perf_counters.h b/contrib/restricted/google/benchmark/src/perf_counters.h index 680555d4b0..152a6f2561 100644 --- a/contrib/restricted/google/benchmark/src/perf_counters.h +++ b/contrib/restricted/google/benchmark/src/perf_counters.h @@ -17,6 +17,7 @@ #include <array> #include <cstdint> +#include <cstring> #include <memory> #include <vector> @@ -44,18 +45,21 @@ namespace internal { // The implementation ensures the storage is inlined, and allows 0-based // indexing into the counter values. // The object is used in conjunction with a PerfCounters object, by passing it -// to Snapshot(). The values are populated such that -// perfCounters->names()[i]'s value is obtained at position i (as given by -// operator[]) of this object. -class PerfCounterValues { +// to Snapshot(). The Read() method relocates individual reads, discarding +// the initial padding from each group leader in the values buffer such that +// all user accesses through the [] operator are correct. +class BENCHMARK_EXPORT PerfCounterValues { public: explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { BM_CHECK_LE(nr_counters_, kMaxCounters); } - uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } + // We are reading correctly now so the values don't need to skip padding + uint64_t operator[](size_t pos) const { return values_[pos]; } - static constexpr size_t kMaxCounters = 3; + // Increased the maximum to 32 only since the buffer + // is std::array<> backed + static constexpr size_t kMaxCounters = 32; private: friend class PerfCounters; @@ -66,7 +70,14 @@ class PerfCounterValues { sizeof(uint64_t) * (kPadding + nr_counters_)}; } - static constexpr size_t kPadding = 1; + // This reading is complex and as the goal of this class is to + // abstract away the intrincacies of the reading process, this is + // a better place for it + size_t Read(const std::vector<int>& leaders); + + // Move the padding to 2 due to the reading algorithm (1st padding plus a + // current read padding) + static constexpr size_t kPadding = 2; std::array<uint64_t, kPadding + kMaxCounters> values_; const size_t nr_counters_; }; @@ -79,10 +90,11 @@ class BENCHMARK_EXPORT PerfCounters final { // True iff this platform supports performance counters. static const bool kSupported; - bool IsValid() const { return !counter_names_.empty(); } + // Returns an empty object static PerfCounters NoCounters() { return PerfCounters(); } ~PerfCounters() { CloseCounters(); } + PerfCounters() = default; PerfCounters(PerfCounters&&) = default; PerfCounters(const PerfCounters&) = delete; PerfCounters& operator=(PerfCounters&&) noexcept; @@ -92,11 +104,15 @@ class BENCHMARK_EXPORT PerfCounters final { // initialization here. static bool Initialize(); + // Check if the given counter is supported, if the app wants to + // check before passing + static bool IsCounterSupported(const std::string& name); + // Return a PerfCounters object ready to read the counters with the names // specified. The values are user-mode only. The counter name format is // implementation and OS specific. - // TODO: once we move to C++-17, this should be a std::optional, and then the - // IsValid() boolean can be dropped. + // In case of failure, this method will in the worst case return an + // empty object whose state will still be valid. static PerfCounters Create(const std::vector<std::string>& counter_names); // Take a snapshot of the current value of the counters into the provided @@ -105,10 +121,7 @@ class BENCHMARK_EXPORT PerfCounters final { BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { #ifndef BENCHMARK_OS_WINDOWS assert(values != nullptr); - assert(IsValid()); - auto buffer = values->get_data_buffer(); - auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); - return static_cast<size_t>(read_bytes) == buffer.second; + return values->Read(leader_ids_) == counter_ids_.size(); #else (void)values; return false; @@ -120,13 +133,15 @@ class BENCHMARK_EXPORT PerfCounters final { private: PerfCounters(const std::vector<std::string>& counter_names, - std::vector<int>&& counter_ids) - : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {} - PerfCounters() = default; + std::vector<int>&& counter_ids, std::vector<int>&& leader_ids) + : counter_ids_(std::move(counter_ids)), + leader_ids_(std::move(leader_ids)), + counter_names_(counter_names) {} void CloseCounters() const; std::vector<int> counter_ids_; + std::vector<int> leader_ids_; std::vector<std::string> counter_names_; }; @@ -134,33 +149,25 @@ class BENCHMARK_EXPORT PerfCounters final { class BENCHMARK_EXPORT PerfCountersMeasurement final { public: PerfCountersMeasurement(const std::vector<std::string>& counter_names); - ~PerfCountersMeasurement(); - - // The only way to get to `counters_` is after ctor-ing a - // `PerfCountersMeasurement`, which means that `counters_`'s state is, here, - // decided (either invalid or valid) and won't change again even if a ctor is - // concurrently running with this. This is preferring efficiency to - // maintainability, because the address of the static can be known at compile - // time. - bool IsValid() const { - MutexLock l(mutex_); - return counters_.IsValid(); - } - BENCHMARK_ALWAYS_INLINE void Start() { - assert(IsValid()); - MutexLock l(mutex_); + size_t num_counters() const { return counters_.num_counters(); } + + std::vector<std::string> names() const { return counters_.names(); } + + BENCHMARK_ALWAYS_INLINE bool Start() { + if (num_counters() == 0) return true; // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); valid_read_ &= counters_.Snapshot(&start_values_); ClobberMemory(); + + return valid_read_; } BENCHMARK_ALWAYS_INLINE bool Stop( std::vector<std::pair<std::string, double>>& measurements) { - assert(IsValid()); - MutexLock l(mutex_); + if (num_counters() == 0) return true; // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); @@ -177,9 +184,7 @@ class BENCHMARK_EXPORT PerfCountersMeasurement final { } private: - static Mutex mutex_; - GUARDED_BY(mutex_) static int ref_count_; - GUARDED_BY(mutex_) static PerfCounters counters_; + PerfCounters counters_; bool valid_read_ = true; PerfCounterValues start_values_; PerfCounterValues end_values_; diff --git a/contrib/restricted/google/benchmark/src/sleep.cc b/contrib/restricted/google/benchmark/src/sleep.cc deleted file mode 100644 index ab59000f24..0000000000 --- a/contrib/restricted/google/benchmark/src/sleep.cc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "sleep.h" - -#include <cerrno> -#include <cstdlib> -#include <ctime> - -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include <windows.h> -#endif - -#ifdef BENCHMARK_OS_ZOS -#include <unistd.h> -#endif - -namespace benchmark { -#ifdef BENCHMARK_OS_WINDOWS -// Window's Sleep takes milliseconds argument. -void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } -void SleepForSeconds(double seconds) { - SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds)); -} -#else // BENCHMARK_OS_WINDOWS -void SleepForMicroseconds(int microseconds) { -#ifdef BENCHMARK_OS_ZOS - // z/OS does not support nanosleep. Instead call sleep() and then usleep() to - // sleep for the remaining microseconds because usleep() will fail if its - // argument is greater than 1000000. - div_t sleepTime = div(microseconds, kNumMicrosPerSecond); - int seconds = sleepTime.quot; - while (seconds != 0) seconds = sleep(seconds); - while (usleep(sleepTime.rem) == -1 && errno == EINTR) - ; -#else - struct timespec sleep_time; - sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; - sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. -#endif -} - -void SleepForMilliseconds(int milliseconds) { - SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); -} - -void SleepForSeconds(double seconds) { - SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond)); -} -#endif // BENCHMARK_OS_WINDOWS -} // end namespace benchmark diff --git a/contrib/restricted/google/benchmark/src/sleep.h b/contrib/restricted/google/benchmark/src/sleep.h deleted file mode 100644 index f98551afe2..0000000000 --- a/contrib/restricted/google/benchmark/src/sleep.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef BENCHMARK_SLEEP_H_ -#define BENCHMARK_SLEEP_H_ - -namespace benchmark { -const int kNumMillisPerSecond = 1000; -const int kNumMicrosPerMilli = 1000; -const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; -const int kNumNanosPerMicro = 1000; -const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; - -void SleepForMilliseconds(int milliseconds); -void SleepForSeconds(double seconds); -} // end namespace benchmark - -#endif // BENCHMARK_SLEEP_H_ diff --git a/contrib/restricted/google/benchmark/src/statistics.cc b/contrib/restricted/google/benchmark/src/statistics.cc index 5ba885ab00..c4b54b271f 100644 --- a/contrib/restricted/google/benchmark/src/statistics.cc +++ b/contrib/restricted/google/benchmark/src/statistics.cc @@ -89,9 +89,8 @@ std::vector<BenchmarkReporter::Run> ComputeStats( typedef BenchmarkReporter::Run Run; std::vector<Run> results; - auto error_count = - std::count_if(reports.begin(), reports.end(), - [](Run const& run) { return run.error_occurred; }); + auto error_count = std::count_if(reports.begin(), reports.end(), + [](Run const& run) { return run.skipped; }); if (reports.size() - error_count < 2) { // We don't report aggregated data if there was a single run. @@ -133,7 +132,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats( for (Run const& run : reports) { BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); BM_CHECK_EQ(run_iterations, run.iterations); - if (run.error_occurred) continue; + if (run.skipped) continue; real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); // user counters diff --git a/contrib/restricted/google/benchmark/src/statistics.h b/contrib/restricted/google/benchmark/src/statistics.h index b0d2c05e72..6e5560e8f1 100644 --- a/contrib/restricted/google/benchmark/src/statistics.h +++ b/contrib/restricted/google/benchmark/src/statistics.h @@ -22,9 +22,10 @@ namespace benchmark { -// Return a vector containing the mean, median and standard devation information -// (and any user-specified info) for the specified list of reports. If 'reports' -// contains less than two non-errored runs an empty vector is returned +// Return a vector containing the mean, median and standard deviation +// information (and any user-specified info) for the specified list of reports. +// If 'reports' contains less than two non-errored runs an empty vector is +// returned BENCHMARK_EXPORT std::vector<BenchmarkReporter::Run> ComputeStats( const std::vector<BenchmarkReporter::Run>& reports); diff --git a/contrib/restricted/google/benchmark/src/string_util.cc b/contrib/restricted/google/benchmark/src/string_util.cc index b3196fc266..5e2d24a3cd 100644 --- a/contrib/restricted/google/benchmark/src/string_util.cc +++ b/contrib/restricted/google/benchmark/src/string_util.cc @@ -94,10 +94,10 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) { const char* array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); - if (iec) + if (iec) { return array[index] + std::string("i"); - else - return std::string(1, array[index]); + } + return std::string(1, array[index]); } std::string ToBinaryStringFullySpecified(double value, double threshold, diff --git a/contrib/restricted/google/benchmark/src/sysinfo.cc b/contrib/restricted/google/benchmark/src/sysinfo.cc index e763d5c481..80eece3ae7 100644 --- a/contrib/restricted/google/benchmark/src/sysinfo.cc +++ b/contrib/restricted/google/benchmark/src/sysinfo.cc @@ -46,6 +46,9 @@ #if defined(BENCHMARK_OS_QURT) #error #include <qurt.h> #endif +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) +#include <pthread.h> +#endif #include <algorithm> #include <array> @@ -62,15 +65,17 @@ #include <limits> #include <locale> #include <memory> +#include <random> #include <sstream> #include <utility> +#include "benchmark/benchmark.h" #include "check.h" #include "cycleclock.h" #include "internal_macros.h" #include "log.h" -#include "sleep.h" #include "string_util.h" +#include "timers.h" namespace benchmark { namespace { @@ -423,19 +428,12 @@ std::string GetSystemName() { #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else - std::vector<wchar_t> converted; - // Find the length first. - int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname, - DWCOUNT, converted.begin(), 0); - // TODO: Report error from GetLastError()? - if (len == 0) return std::string(""); - converted.reserve(len + 1); - - len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname, DWCOUNT, - converted.begin(), converted.size()); - // TODO: Report error from GetLastError()? - if (len == 0) return std::string(""); - str = std::string(converted.data()); + // `WideCharToMultiByte` returns `0` when conversion fails. + int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, + DWCOUNT, NULL, 0, NULL, NULL); + str.resize(len); + WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0], + str.size(), NULL, NULL); #endif return str; #elif defined(BENCHMARK_OS_QURT) @@ -448,7 +446,7 @@ std::string GetSystemName() { return str; #else #ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined +#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_NACL) #define HOST_NAME_MAX 64 @@ -551,6 +549,80 @@ int GetNumCPUs() { BENCHMARK_UNREACHABLE(); } +class ThreadAffinityGuard final { + public: + ThreadAffinityGuard() : reset_affinity(SetAffinity()) { + if (!reset_affinity) + std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU " + "frequency may be incorrect." + << std::endl; + } + + ~ThreadAffinityGuard() { + if (!reset_affinity) return; + +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + int ret = pthread_setaffinity_np(self, sizeof(previous_affinity), + &previous_affinity); + if (ret == 0) return; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity); + if (ret != 0) return; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + PrintErrorAndDie("Failed to reset thread affinity"); + } + + ThreadAffinityGuard(ThreadAffinityGuard&&) = delete; + ThreadAffinityGuard(const ThreadAffinityGuard&) = delete; + ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete; + ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete; + + private: + bool SetAffinity() { +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + int ret; + self = pthread_self(); + ret = pthread_getaffinity_np(self, sizeof(previous_affinity), + &previous_affinity); + if (ret != 0) return false; + + cpu_set_t affinity; + memcpy(&affinity, &previous_affinity, sizeof(affinity)); + + bool is_first_cpu = true; + + for (int i = 0; i < CPU_SETSIZE; ++i) + if (CPU_ISSET(i, &affinity)) { + if (is_first_cpu) + is_first_cpu = false; + else + CPU_CLR(i, &affinity); + } + + if (is_first_cpu) return false; + + ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity); + return ret == 0; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + self = GetCurrentThread(); + DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber(); + previous_affinity = SetThreadAffinityMask(self, mask); + return previous_affinity != 0; +#else + return false; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + } + +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + pthread_t self; + cpu_set_t previous_affinity; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + HANDLE self; + DWORD_PTR previous_affinity; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + bool reset_affinity; +}; + double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { // Currently, scaling is only used on linux path here, // suppress diagnostics about it being unused on other paths. @@ -706,10 +778,39 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { return 1000000000; #endif // If we've fallen through, attempt to roughly estimate the CPU clock rate. - static constexpr int estimate_time_ms = 1000; + + // Make sure to use the same cycle counter when starting and stopping the + // cycle timer. We just pin the current thread to a cpu in the previous + // affinity set. + ThreadAffinityGuard affinity_guard; + + static constexpr double estimate_time_s = 1.0; + const double start_time = ChronoClockNow(); const auto start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return static_cast<double>(cycleclock::Now() - start_ticks); + + // Impose load instead of calling sleep() to make sure the cycle counter + // works. + using PRNG = std::minstd_rand; + using Result = PRNG::result_type; + PRNG rng(static_cast<Result>(start_ticks)); + + Result state = 0; + + do { + static constexpr size_t batch_size = 10000; + rng.discard(batch_size); + state += rng(); + + } while (ChronoClockNow() - start_time < estimate_time_s); + + DoNotOptimize(state); + + const auto end_ticks = cycleclock::Now(); + const double end_time = ChronoClockNow(); + + return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time); + // Reset the affinity of current thread when the lifetime of affinity_guard + // ends. } std::vector<double> GetLoadAvg() { diff --git a/contrib/restricted/google/benchmark/src/thread_manager.h b/contrib/restricted/google/benchmark/src/thread_manager.h index 4680285089..819b3c44db 100644 --- a/contrib/restricted/google/benchmark/src/thread_manager.h +++ b/contrib/restricted/google/benchmark/src/thread_manager.h @@ -43,8 +43,8 @@ class ThreadManager { double manual_time_used = 0; int64_t complexity_n = 0; std::string report_label_; - std::string error_message_; - bool has_error_ = false; + std::string skip_message_; + internal::Skipped skipped_ = internal::NotSkipped; UserCounters counters; }; GUARDED_BY(GetBenchmarkMutex()) Result results; diff --git a/contrib/restricted/google/benchmark/src/timers.cc b/contrib/restricted/google/benchmark/src/timers.cc index 0a4da83c19..042895d0d4 100644 --- a/contrib/restricted/google/benchmark/src/timers.cc +++ b/contrib/restricted/google/benchmark/src/timers.cc @@ -59,7 +59,6 @@ #include "check.h" #include "log.h" -#include "sleep.h" #include "string_util.h" namespace benchmark { @@ -68,6 +67,9 @@ namespace benchmark { #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wunused-function" #endif +#if defined(__NVCOMPILER) +#pragma diag_suppress declared_but_not_referenced +#endif namespace { #if defined(BENCHMARK_OS_WINDOWS) diff --git a/contrib/restricted/google/benchmark/test/string_util_gtest.cc b/contrib/restricted/google/benchmark/test/string_util_gtest.cc index 698f2d43eb..8bfdb7a72c 100644 --- a/contrib/restricted/google/benchmark/test/string_util_gtest.cc +++ b/contrib/restricted/google/benchmark/test/string_util_gtest.cc @@ -2,6 +2,8 @@ // statistics_test - Unit tests for src/statistics.cc //===---------------------------------------------------------------------===// +#include <tuple> + #include "../src/internal_macros.h" #include "../src/string_util.h" #include "gtest/gtest.h" @@ -63,7 +65,10 @@ TEST(StringUtilTest, stoul) { EXPECT_EQ(4ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); } + { + ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"), + std::invalid_argument); + } #endif } @@ -107,7 +112,10 @@ EXPECT_EQ(1ul, pos); EXPECT_EQ(4ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS -{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); } +{ + ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"), + std::invalid_argument); +} #endif } @@ -137,7 +145,10 @@ EXPECT_EQ(1ul, pos); EXPECT_EQ(8ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS -{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); } +{ + ASSERT_THROW(std::ignore = benchmark::stod("this is a test"), + std::invalid_argument); +} #endif } diff --git a/contrib/restricted/google/benchmark/test/ya.make b/contrib/restricted/google/benchmark/test/ya.make index e5ca53e0d8..0106f3212f 100644 --- a/contrib/restricted/google/benchmark/test/ya.make +++ b/contrib/restricted/google/benchmark/test/ya.make @@ -20,6 +20,7 @@ CFLAGS( -DBENCHMARK_STATIC_DEFINE -DGTEST_LINKED_AS_SHARED_LIBRARY=1 -DHAVE_POSIX_REGEX + -DHAVE_PTHREAD_AFFINITY -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK ) diff --git a/contrib/restricted/google/benchmark/ya.make b/contrib/restricted/google/benchmark/ya.make index 8ef295a7b5..886e082a5b 100644 --- a/contrib/restricted/google/benchmark/ya.make +++ b/contrib/restricted/google/benchmark/ya.make @@ -1,4 +1,4 @@ -# Generated by devtools/yamaker from nixpkgs 22.05. +# Generated by devtools/yamaker from nixpkgs 22.11. LIBRARY() @@ -6,9 +6,9 @@ LICENSE(Apache-2.0) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(1.7.1) +VERSION(1.8.0) -ORIGINAL_SOURCE(https://github.com/google/benchmark/archive/v1.7.1.tar.gz) +ORIGINAL_SOURCE(https://github.com/google/benchmark/archive/v1.8.0.tar.gz) ADDINCL( GLOBAL contrib/restricted/google/benchmark/include @@ -22,10 +22,17 @@ NO_UTIL() CFLAGS( GLOBAL -DBENCHMARK_STATIC_DEFINE -DHAVE_POSIX_REGEX + -DHAVE_PTHREAD_AFFINITY -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK ) +IF (OS_LINUX) + CFLAGS( + -DBENCHMARK_HAS_PTHREAD_AFFINITY + ) +ENDIF() + SRCS( src/benchmark.cc src/benchmark_api_internal.cc @@ -42,7 +49,6 @@ SRCS( src/json_reporter.cc src/perf_counters.cc src/reporter.cc - src/sleep.cc src/statistics.cc src/string_util.cc src/sysinfo.cc |