aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.com>2023-06-14 13:05:42 +0300
committerthegeorg <thegeorg@yandex-team.com>2023-06-14 13:05:42 +0300
commit4b972da4fb8c047e6c1b876f3d026f213cade3b7 (patch)
tree4c7453323a0a572846b42a1091859c4867ae68f4
parentbb4d56c9f2833ca3622dce9ee4a799910ea1549c (diff)
downloadydb-4b972da4fb8c047e6c1b876f3d026f213cade3b7.tar.gz
Update contrib/restricted/google/benchmark to 1.8.0
-rw-r--r--contrib/restricted/google/benchmark/AUTHORS3
-rw-r--r--contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt2
-rw-r--r--contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt3
-rw-r--r--contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt3
-rw-r--r--contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt2
-rw-r--r--contrib/restricted/google/benchmark/README.md10
-rw-r--r--contrib/restricted/google/benchmark/include/benchmark/benchmark.h262
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark.cc141
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark_api_internal.cc12
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark_register.cc14
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark_runner.cc111
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark_runner.h24
-rw-r--r--contrib/restricted/google/benchmark/src/colorprint.cc20
-rw-r--r--contrib/restricted/google/benchmark/src/commandlineflags.cc7
-rw-r--r--contrib/restricted/google/benchmark/src/complexity.h2
-rw-r--r--contrib/restricted/google/benchmark/src/console_reporter.cc10
-rw-r--r--contrib/restricted/google/benchmark/src/csv_reporter.cc6
-rw-r--r--contrib/restricted/google/benchmark/src/cycleclock.h5
-rw-r--r--contrib/restricted/google/benchmark/src/internal_macros.h6
-rw-r--r--contrib/restricted/google/benchmark/src/json_reporter.cc12
-rw-r--r--contrib/restricted/google/benchmark/src/perf_counters.cc229
-rw-r--r--contrib/restricted/google/benchmark/src/perf_counters.h79
-rw-r--r--contrib/restricted/google/benchmark/src/sleep.cc66
-rw-r--r--contrib/restricted/google/benchmark/src/sleep.h15
-rw-r--r--contrib/restricted/google/benchmark/src/statistics.cc7
-rw-r--r--contrib/restricted/google/benchmark/src/statistics.h7
-rw-r--r--contrib/restricted/google/benchmark/src/string_util.cc6
-rw-r--r--contrib/restricted/google/benchmark/src/sysinfo.cc137
-rw-r--r--contrib/restricted/google/benchmark/src/thread_manager.h4
-rw-r--r--contrib/restricted/google/benchmark/src/timers.cc4
-rw-r--r--contrib/restricted/google/benchmark/test/string_util_gtest.cc17
-rw-r--r--contrib/restricted/google/benchmark/test/ya.make1
-rw-r--r--contrib/restricted/google/benchmark/ya.make14
33 files changed, 834 insertions, 407 deletions
diff --git a/contrib/restricted/google/benchmark/AUTHORS b/contrib/restricted/google/benchmark/AUTHORS
index 98d2d98b05..bafecaddb5 100644
--- a/contrib/restricted/google/benchmark/AUTHORS
+++ b/contrib/restricted/google/benchmark/AUTHORS
@@ -32,6 +32,7 @@ Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Gergő Szitár <szitar.gergo@gmail.com>
Google Inc.
+Henrique Bucher <hbucher@gmail.com>
International Business Machines Corporation
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
@@ -42,8 +43,10 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
+Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
+Mike Apodaca <gatorfax@gmail.com>
MongoDB Inc.
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
diff --git a/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt
index cfad7dc3cc..0e16d88f39 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt
@@ -13,6 +13,7 @@ target_compile_options(restricted-google-benchmark PUBLIC
)
target_compile_options(restricted-google-benchmark PRIVATE
-DHAVE_POSIX_REGEX
+ -DHAVE_PTHREAD_AFFINITY
-DHAVE_STD_REGEX
-DHAVE_STEADY_CLOCK
$<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
@@ -42,7 +43,6 @@ target_sources(restricted-google-benchmark PRIVATE
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
- ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt b/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt
index d1f192eee7..a18f8b0963 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt
@@ -13,8 +13,10 @@ target_compile_options(restricted-google-benchmark PUBLIC
)
target_compile_options(restricted-google-benchmark PRIVATE
-DHAVE_POSIX_REGEX
+ -DHAVE_PTHREAD_AFFINITY
-DHAVE_STD_REGEX
-DHAVE_STEADY_CLOCK
+ -DBENCHMARK_HAS_PTHREAD_AFFINITY
$<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
)
target_include_directories(restricted-google-benchmark PUBLIC
@@ -43,7 +45,6 @@ target_sources(restricted-google-benchmark PRIVATE
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
- ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt
index d1f192eee7..a18f8b0963 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt
@@ -13,8 +13,10 @@ target_compile_options(restricted-google-benchmark PUBLIC
)
target_compile_options(restricted-google-benchmark PRIVATE
-DHAVE_POSIX_REGEX
+ -DHAVE_PTHREAD_AFFINITY
-DHAVE_STD_REGEX
-DHAVE_STEADY_CLOCK
+ -DBENCHMARK_HAS_PTHREAD_AFFINITY
$<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
)
target_include_directories(restricted-google-benchmark PUBLIC
@@ -43,7 +45,6 @@ target_sources(restricted-google-benchmark PRIVATE
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
- ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt
index cfad7dc3cc..0e16d88f39 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt
@@ -13,6 +13,7 @@ target_compile_options(restricted-google-benchmark PUBLIC
)
target_compile_options(restricted-google-benchmark PRIVATE
-DHAVE_POSIX_REGEX
+ -DHAVE_PTHREAD_AFFINITY
-DHAVE_STD_REGEX
-DHAVE_STEADY_CLOCK
$<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
@@ -42,7 +43,6 @@ target_sources(restricted-google-benchmark PRIVATE
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
- ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/README.md b/contrib/restricted/google/benchmark/README.md
index 205fb008af..b64048b7d3 100644
--- a/contrib/restricted/google/benchmark/README.md
+++ b/contrib/restricted/google/benchmark/README.md
@@ -5,7 +5,7 @@
[![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint)
[![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings)
-[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark)
+[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=main)](https://travis-ci.org/google/benchmark)
[![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark)
@@ -33,7 +33,7 @@ To get started, see [Requirements](#requirements) and
[Installation](#installation). See [Usage](#usage) for a full example and the
[User Guide](docs/user_guide.md) for a more comprehensive feature overview.
-It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md)
+It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md)
as some of the structural aspects of the APIs are similar.
## Resources
@@ -139,6 +139,12 @@ cache variables, if autodetection fails.
If you are using clang, you may need to set `LLVMAR_EXECUTABLE`,
`LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.
+To enable sanitizer checks (eg., `asan` and `tsan`), add:
+```
+ -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all"
+ -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all "
+```
+
### Stable and Experimental Library Versions
The main branch contains the latest stable version of the benchmarking library;
diff --git a/contrib/restricted/google/benchmark/include/benchmark/benchmark.h b/contrib/restricted/google/benchmark/include/benchmark/benchmark.h
index 77dcfbdc2a..4a8be19d86 100644
--- a/contrib/restricted/google/benchmark/include/benchmark/benchmark.h
+++ b/contrib/restricted/google/benchmark/include/benchmark/benchmark.h
@@ -218,6 +218,18 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
#define BENCHMARK_UNUSED
#endif
+// Used to annotate functions, methods and classes so they
+// are not optimized by the compiler. Useful for tests
+// where you expect loops to stay in place churning cycles
+#if defined(__clang__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
+#elif defined(__GNUC__) || defined(__GNUG__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
+#else
+// MSVC & Intel do not have a no-optimize attribute, only line pragmas
+#define BENCHMARK_DONT_OPTIMIZE
+#endif
+
#if defined(__GNUC__) || defined(__clang__)
#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
#elif defined(_MSC_VER) && !defined(__clang__)
@@ -231,13 +243,20 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
// clang-format off
-#if defined(__GNUC__) && !defined(__NVCC__) || defined(__clang__)
+#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
+#elif defined(__NVCOMPILER)
+#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
+ _Pragma("diagnostic push") \
+ _Pragma("diag_suppress deprecated_entity_with_custom_message")
+#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
#else
#define BENCHMARK_BUILTIN_EXPECT(x, y) x
#define BENCHMARK_DEPRECATED_MSG(msg)
@@ -280,6 +299,9 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
namespace benchmark {
class BenchmarkReporter;
+// Default number of minimum benchmark running time in seconds.
+const char kDefaultMinTimeStr[] = "0.5s";
+
BENCHMARK_EXPORT void PrintDefaultHelp();
BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
@@ -383,13 +405,7 @@ class MemoryManager {
virtual void Start() = 0;
// Implement this to stop recording and fill out the given Result structure.
- BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead")
- virtual void Stop(Result* result) = 0;
-
- // FIXME(vyng): Make this pure virtual once we've migrated current users.
- BENCHMARK_DISABLE_DEPRECATED_WARNING
- virtual void Stop(Result& result) { Stop(&result); }
- BENCHMARK_RESTORE_DEPRECATED_WARNING
+ virtual void Stop(Result& result) = 0;
};
// Register a MemoryManager instance that will be used to collect and report
@@ -441,12 +457,21 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : : "r,m"(value) : "memory");
}
template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(
+#ifdef BENCHMARK_HAS_CXX11
+ Tp&& value
+#else
+ Tp& value
+#endif
+) {
#if defined(__clang__)
asm volatile("" : "+r,m"(value) : : "memory");
#else
@@ -457,6 +482,9 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
// Workaround for a bug with full argument copy overhead with GCC.
// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE
typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
(sizeof(Tp) <= sizeof(Tp*))>::type
@@ -465,6 +493,9 @@ inline BENCHMARK_ALWAYS_INLINE
}
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE
typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
(sizeof(Tp) > sizeof(Tp*))>::type
@@ -476,7 +507,7 @@ template <class Tp>
inline BENCHMARK_ALWAYS_INLINE
typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
(sizeof(Tp) <= sizeof(Tp*))>::type
- DoNotOptimize(Tp& value) {
+ DoNotOptimize(Tp&& value) {
asm volatile("" : "+m,r"(value) : : "memory");
}
@@ -484,7 +515,7 @@ template <class Tp>
inline BENCHMARK_ALWAYS_INLINE
typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
(sizeof(Tp) > sizeof(Tp*))>::type
- DoNotOptimize(Tp& value) {
+ DoNotOptimize(Tp&& value) {
asm volatile("" : "+m"(value) : : "memory");
}
@@ -493,12 +524,21 @@ inline BENCHMARK_ALWAYS_INLINE
// to use memory operations instead of operations with registers.
// TODO: Remove if GCC < 5 will be unsupported.
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : : "m"(value) : "memory");
}
template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(
+#ifdef BENCHMARK_HAS_CXX11
+ Tp&& value
+#else
+ Tp& value
+#endif
+) {
asm volatile("" : "+m"(value) : : "memory");
}
#endif
@@ -510,6 +550,9 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
#endif
#elif defined(_MSC_VER)
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
_ReadWriteBarrier();
@@ -520,6 +563,9 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
#endif
#else
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
}
@@ -640,6 +686,16 @@ enum AggregationReportMode
ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
};
+enum Skipped
+#if defined(BENCHMARK_HAS_CXX11)
+ : unsigned
+#endif
+{
+ NotSkipped = 0,
+ SkippedWithMessage,
+ SkippedWithError
+};
+
} // namespace internal
// State is passed to a running Benchmark and contains state for the
@@ -676,8 +732,8 @@ class BENCHMARK_EXPORT State {
// }
bool KeepRunningBatch(IterationCount n);
- // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
- // by the current thread.
+ // REQUIRES: timer is running and 'SkipWithMessage(...)' or
+ // 'SkipWithError(...)' has not been called by the current thread.
// Stop the benchmark timer. If not called, the timer will be
// automatically stopped after the last iteration of the benchmark loop.
//
@@ -692,8 +748,8 @@ class BENCHMARK_EXPORT State {
// within each benchmark iteration, if possible.
void PauseTiming();
- // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
- // by the current thread.
+ // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
+ // 'SkipWithError(...)' has not been called by the current thread.
// Start the benchmark timer. The timer is NOT running on entrance to the
// benchmark function. It begins running after control flow enters the
// benchmark loop.
@@ -703,8 +759,30 @@ class BENCHMARK_EXPORT State {
// within each benchmark iteration, if possible.
void ResumeTiming();
- // REQUIRES: 'SkipWithError(...)' has not been called previously by the
- // current thread.
+ // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+ // called previously by the current thread.
+ // Report the benchmark as resulting in being skipped with the specified
+ // 'msg'.
+ // After this call the user may explicitly 'return' from the benchmark.
+ //
+ // If the ranged-for style of benchmark loop is used, the user must explicitly
+ // break from the loop, otherwise all future iterations will be run.
+ // If the 'KeepRunning()' loop is used the current thread will automatically
+ // exit the loop at the end of the current iteration.
+ //
+ // For threaded benchmarks only the current thread stops executing and future
+ // calls to `KeepRunning()` will block until all threads have completed
+ // the `KeepRunning()` loop. If multiple threads report being skipped only the
+ // first skip message is used.
+ //
+ // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
+ // the current scope immediately. If the function is called from within
+ // the 'KeepRunning()' loop the current iteration will finish. It is the users
+ // responsibility to exit the scope as needed.
+ void SkipWithMessage(const std::string& msg);
+
+ // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+ // called previously by the current thread.
// Report the benchmark as resulting in an error with the specified 'msg'.
// After this call the user may explicitly 'return' from the benchmark.
//
@@ -722,10 +800,13 @@ class BENCHMARK_EXPORT State {
// the current scope immediately. If the function is called from within
// the 'KeepRunning()' loop the current iteration will finish. It is the users
// responsibility to exit the scope as needed.
- void SkipWithError(const char* msg);
+ void SkipWithError(const std::string& msg);
+
+ // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
+ bool skipped() const { return internal::NotSkipped != skipped_; }
// Returns true if an error has been reported with 'SkipWithError(...)'.
- bool error_occurred() const { return error_occurred_; }
+ bool error_occurred() const { return internal::SkippedWithError == skipped_; }
// REQUIRES: called exactly once per iteration of the benchmarking loop.
// Set the manually measured time for this benchmark iteration, which
@@ -796,11 +877,7 @@ class BENCHMARK_EXPORT State {
// BM_Compress 50 50 14115038 compress:27.3%
//
// REQUIRES: a benchmark has exited its benchmarking loop.
- void SetLabel(const char* label);
-
- void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
- this->SetLabel(str.c_str());
- }
+ void SetLabel(const std::string& label);
// Range arguments for this run. CHECKs if the argument has been set.
BENCHMARK_ALWAYS_INLINE
@@ -831,6 +908,9 @@ class BENCHMARK_EXPORT State {
return max_iterations - total_iterations_ + batch_leftover_;
}
+ BENCHMARK_ALWAYS_INLINE
+ std::string name() const { return name_; }
+
private:
// items we expect on the first cache line (ie 64 bytes of the struct)
// When total_iterations_ is 0, KeepRunning() and friends will return false.
@@ -848,7 +928,7 @@ class BENCHMARK_EXPORT State {
private:
bool started_;
bool finished_;
- bool error_occurred_;
+ internal::Skipped skipped_;
// items we don't need on the first cache line
std::vector<int64_t> range_;
@@ -860,9 +940,9 @@ class BENCHMARK_EXPORT State {
UserCounters counters;
private:
- State(IterationCount max_iters, const std::vector<int64_t>& ranges,
- int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager,
+ State(std::string name, IterationCount max_iters,
+ const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+ internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement);
void StartKeepRunning();
@@ -871,6 +951,7 @@ class BENCHMARK_EXPORT State {
bool KeepRunningInternal(IterationCount n, bool is_batch);
void FinishKeepRunning();
+ const std::string name_;
const int thread_index_;
const int threads_;
@@ -902,7 +983,7 @@ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
}
if (!started_) {
StartKeepRunning();
- if (!error_occurred_ && total_iterations_ >= n) {
+ if (!skipped() && total_iterations_ >= n) {
total_iterations_ -= n;
return true;
}
@@ -932,7 +1013,7 @@ struct State::StateIterator {
BENCHMARK_ALWAYS_INLINE
explicit StateIterator(State* st)
- : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
+ : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
public:
BENCHMARK_ALWAYS_INLINE
@@ -1083,11 +1164,12 @@ class BENCHMARK_EXPORT Benchmark {
Benchmark* MinWarmUpTime(double t);
// Specify the amount of iterations that should be run by this benchmark.
+ // This option overrides the `benchmark_min_time` flag.
// REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
//
// NOTE: This function should only be used when *exact* iteration control is
// needed and never to control or limit how long a benchmark runs, where
- // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
+ // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
Benchmark* Iterations(IterationCount n);
// Specify the amount of times to repeat this benchmark. This option overrides
@@ -1175,10 +1257,13 @@ class BENCHMARK_EXPORT Benchmark {
TimeUnit GetTimeUnit() const;
protected:
- explicit Benchmark(const char* name);
- void SetName(const char* name);
+ explicit Benchmark(const std::string& name);
+ void SetName(const std::string& name);
+ public:
+ const char* GetName() const;
int ArgsCnt() const;
+ const char* GetArgName(int arg) const;
private:
friend class BenchmarkFamilies;
@@ -1228,12 +1313,12 @@ class BENCHMARK_EXPORT Benchmark {
// the specified functor 'fn'.
//
// RETURNS: A pointer to the registered benchmark.
-internal::Benchmark* RegisterBenchmark(const char* name,
+internal::Benchmark* RegisterBenchmark(const std::string& name,
internal::Function* fn);
#if defined(BENCHMARK_HAS_CXX11)
template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
#endif
// Remove all registered benchmarks. All pointers to previously registered
@@ -1245,10 +1330,10 @@ namespace internal {
// (ie those created using the BENCHMARK(...) macros.
class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
public:
- FunctionBenchmark(const char* name, Function* func)
+ FunctionBenchmark(const std::string& name, Function* func)
: Benchmark(name), func_(func) {}
- virtual void Run(State& st) BENCHMARK_OVERRIDE;
+ void Run(State& st) BENCHMARK_OVERRIDE;
private:
Function* func_;
@@ -1258,24 +1343,24 @@ class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
template <class Lambda>
class LambdaBenchmark : public Benchmark {
public:
- virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
+ void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
private:
template <class OLambda>
- LambdaBenchmark(const char* name, OLambda&& lam)
+ LambdaBenchmark(const std::string& name, OLambda&& lam)
: Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
LambdaBenchmark(LambdaBenchmark const&) = delete;
template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration)
- friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+ friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&);
Lambda lambda_;
};
#endif
} // namespace internal
-inline internal::Benchmark* RegisterBenchmark(const char* name,
+inline internal::Benchmark* RegisterBenchmark(const std::string& name,
internal::Function* fn) {
return internal::RegisterBenchmarkInternal(
::new internal::FunctionBenchmark(name, fn));
@@ -1283,7 +1368,7 @@ inline internal::Benchmark* RegisterBenchmark(const char* name,
#ifdef BENCHMARK_HAS_CXX11
template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
using BenchType =
internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
return internal::RegisterBenchmarkInternal(
@@ -1294,7 +1379,7 @@ internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
#if defined(BENCHMARK_HAS_CXX11) && \
(!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
template <class Lambda, class... Args>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
Args&&... args) {
return benchmark::RegisterBenchmark(
name, [=](benchmark::State& st) { fn(st, args...); });
@@ -1308,7 +1393,7 @@ class Fixture : public internal::Benchmark {
public:
Fixture() : internal::Benchmark("") {}
- virtual void Run(State& st) BENCHMARK_OVERRIDE {
+ void Run(State& st) BENCHMARK_OVERRIDE {
this->SetUp(st);
this->BenchmarkCase(st);
this->TearDown(st);
@@ -1363,7 +1448,7 @@ class Fixture : public internal::Benchmark {
BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
(::benchmark::internal::RegisterBenchmarkInternal( \
new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
- &__VA_ARGS__)))
+ __VA_ARGS__)))
#else
#define BENCHMARK(n) \
BENCHMARK_PRIVATE_DECLARE(n) = \
@@ -1430,37 +1515,37 @@ class Fixture : public internal::Benchmark {
#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
#endif
-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
- class BaseClass##_##Method##_Benchmark : public BaseClass { \
- public: \
- BaseClass##_##Method##_Benchmark() { \
- this->SetName(#BaseClass "/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
-#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
- class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
- public: \
- BaseClass##_##Method##_Benchmark() { \
- this->SetName(#BaseClass "<" #a ">/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "<" #a ">/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
-#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
- class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
- public: \
- BaseClass##_##Method##_Benchmark() { \
- this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#ifdef BENCHMARK_HAS_CXX11
@@ -1472,7 +1557,7 @@ class Fixture : public internal::Benchmark {
} \
\
protected: \
- virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#else
#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
@@ -1630,7 +1715,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
Run()
: run_type(RT_Iteration),
aggregate_unit(kTime),
- error_occurred(false),
+ skipped(internal::NotSkipped),
iterations(1),
threads(1),
time_unit(GetDefaultTimeUnit()),
@@ -1653,12 +1738,11 @@ class BENCHMARK_EXPORT BenchmarkReporter {
std::string aggregate_name;
StatisticUnit aggregate_unit;
std::string report_label; // Empty if not set by benchmark.
- bool error_occurred;
- std::string error_message;
+ internal::Skipped skipped;
+ std::string skip_message;
// Total iterations across all threads.
IterationCount iterations;
-
int64_t threads;
int64_t repetition_index;
int64_t repetitions;
@@ -1728,6 +1812,12 @@ class BENCHMARK_EXPORT BenchmarkReporter {
virtual bool ReportContext(const Context& context) = 0;
// Called once for each group of benchmark runs, gives information about
+ // the configurations of the runs.
+ virtual void ReportRunsConfig(double /*min_time*/,
+ bool /*has_explicit_iters*/,
+ IterationCount /*iters*/) {}
+
+ // Called once for each group of benchmark runs, gives information about
// cpu-time and heap memory usage during the benchmark run. If the group
// of runs contained more than two entries then 'report' contains additional
// elements representing the mean and standard deviation of those runs.
@@ -1784,8 +1874,8 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
: output_options_(opts_), name_field_width_(0), printed_header_(false) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
- virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
protected:
virtual void PrintRunData(const Run& report);
@@ -1800,9 +1890,9 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
public:
JSONReporter() : first_report_(true) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
- virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
- virtual void Finalize() BENCHMARK_OVERRIDE;
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ void Finalize() BENCHMARK_OVERRIDE;
private:
void PrintRunData(const Run& report);
@@ -1815,8 +1905,8 @@ class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
: public BenchmarkReporter {
public:
CSVReporter() : printed_header_(false) {}
- virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
- virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
private:
void PrintRunData(const Run& report);
@@ -1855,7 +1945,7 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) {
// Creates a list of integer values for the given range and multiplier.
// This can be used together with ArgsProduct() to allow multiple ranges
-// with different multiplers.
+// with different multipliers.
// Example:
// ArgsProduct({
// CreateRange(0, 1024, /*multi=*/32),
diff --git a/contrib/restricted/google/benchmark/src/benchmark.cc b/contrib/restricted/google/benchmark/src/benchmark.cc
index ff2864804c..f1633b703f 100644
--- a/contrib/restricted/google/benchmark/src/benchmark.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark.cc
@@ -65,16 +65,25 @@ BM_DEFINE_bool(benchmark_list_tests, false);
// linked into the binary are run.
BM_DEFINE_string(benchmark_filter, "");
-// Minimum number of seconds we should run benchmark before results are
-// considered significant. For cpu-time based tests, this is the lower bound
+// Specification of how long to run the benchmark.
+//
+// It can be either an exact number of iterations (specified as `<integer>x`),
+// or a minimum number of seconds (specified as `<float>s`). If the latter
+// format (ie., min seconds) is used, the system may run the benchmark longer
+// until the results are considered significant.
+//
+// For backward compatibility, the `s` suffix may be omitted, in which case,
+// the specified number is interpreted as the number of seconds.
+//
+// For cpu-time based tests, this is the lower bound
// on the total cpu time used by all threads that make up the test. For
// real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads.
-BM_DEFINE_double(benchmark_min_time, 0.5);
+BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
// Minimum number of seconds a benchmark should be run before results should be
-// taken into account. This e.g can be neccessary for benchmarks of code which
-// needs to fill some form of cache before performance is of interrest.
+// taken into account. This e.g can be necessary for benchmarks of code which
+// needs to fill some form of cache before performance is of interest.
// Note: results gathered within this period are discarded and not used for
// reported result.
BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
@@ -148,18 +157,19 @@ void UseCharPointer(char const volatile*) {}
} // namespace internal
-State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
- int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager,
+State::State(std::string name, IterationCount max_iters,
+ const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+ internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
started_(false),
finished_(false),
- error_occurred_(false),
+ skipped_(internal::NotSkipped),
range_(ranges),
complexity_n_(0),
+ name_(std::move(name)),
thread_index_(thread_i),
threads_(n_threads),
timer_(timer),
@@ -186,11 +196,14 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
#pragma nv_diagnostic push
#pragma nv_diag_suppress 1427
#endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic push
+#pragma diag_suppress offset_in_non_POD_nonstandard
+#endif
// Offset tests to ensure commonly accessed data is on the first cache line.
const int cache_line_size = 64;
- static_assert(offsetof(State, error_occurred_) <=
- (cache_line_size - sizeof(error_occurred_)),
- "");
+ static_assert(
+ offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
#if defined(__INTEL_COMPILER)
#pragma warning pop
#elif defined(__GNUC__)
@@ -199,11 +212,14 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
#if defined(__NVCC__)
#pragma nv_diagnostic pop
#endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic pop
+#endif
}
void State::PauseTiming() {
// Add in time accumulated so far
- BM_CHECK(started_ && !finished_ && !error_occurred_);
+ BM_CHECK(started_ && !finished_ && !skipped());
timer_->StopTimer();
if (perf_counters_measurement_) {
std::vector<std::pair<std::string, double>> measurements;
@@ -220,21 +236,33 @@ void State::PauseTiming() {
}
void State::ResumeTiming() {
- BM_CHECK(started_ && !finished_ && !error_occurred_);
+ BM_CHECK(started_ && !finished_ && !skipped());
timer_->StartTimer();
if (perf_counters_measurement_) {
perf_counters_measurement_->Start();
}
}
-void State::SkipWithError(const char* msg) {
- BM_CHECK(msg);
- error_occurred_ = true;
+void State::SkipWithMessage(const std::string& msg) {
+ skipped_ = internal::SkippedWithMessage;
{
MutexLock l(manager_->GetBenchmarkMutex());
- if (manager_->results.has_error_ == false) {
- manager_->results.error_message_ = msg;
- manager_->results.has_error_ = true;
+ if (internal::NotSkipped == manager_->results.skipped_) {
+ manager_->results.skip_message_ = msg;
+ manager_->results.skipped_ = skipped_;
+ }
+ }
+ total_iterations_ = 0;
+ if (timer_->running()) timer_->StopTimer();
+}
+
+void State::SkipWithError(const std::string& msg) {
+ skipped_ = internal::SkippedWithError;
+ {
+ MutexLock l(manager_->GetBenchmarkMutex());
+ if (internal::NotSkipped == manager_->results.skipped_) {
+ manager_->results.skip_message_ = msg;
+ manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
@@ -245,7 +273,7 @@ void State::SetIterationTime(double seconds) {
timer_->SetIterationTime(seconds);
}
-void State::SetLabel(const char* label) {
+void State::SetLabel(const std::string& label) {
MutexLock l(manager_->GetBenchmarkMutex());
manager_->results.report_label_ = label;
}
@@ -253,14 +281,14 @@ void State::SetLabel(const char* label) {
void State::StartKeepRunning() {
BM_CHECK(!started_ && !finished_);
started_ = true;
- total_iterations_ = error_occurred_ ? 0 : max_iterations;
+ total_iterations_ = skipped() ? 0 : max_iterations;
manager_->StartStopBarrier();
- if (!error_occurred_) ResumeTiming();
+ if (!skipped()) ResumeTiming();
}
void State::FinishKeepRunning() {
- BM_CHECK(started_ && (!finished_ || error_occurred_));
- if (!error_occurred_) {
+ BM_CHECK(started_ && (!finished_ || skipped()));
+ if (!skipped()) {
PauseTiming();
}
// Total iterations has now wrapped around past 0. Fix this.
@@ -338,14 +366,26 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
size_t num_repetitions_total = 0;
+ // This perfcounters object needs to be created before the runners vector
+ // below so it outlasts their lifetime.
+ PerfCountersMeasurement perfcounters(
+ StrSplit(FLAGS_benchmark_perf_counters, ','));
+
+ // Vector of benchmarks to run
std::vector<internal::BenchmarkRunner> runners;
runners.reserve(benchmarks.size());
+
+ // Count the number of benchmarks with threads to warn the user in case
+ // performance counters are used.
+ int benchmarks_with_threads = 0;
+
+ // Loop through all benchmarks
for (const BenchmarkInstance& benchmark : benchmarks) {
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
if (benchmark.complexity() != oNone)
reports_for_family = &per_family_reports[benchmark.family_index()];
-
- runners.emplace_back(benchmark, reports_for_family);
+ benchmarks_with_threads += (benchmark.threads() > 0);
+ runners.emplace_back(benchmark, &perfcounters, reports_for_family);
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
num_repetitions_total += num_repeats_of_this_instance;
if (reports_for_family)
@@ -353,6 +393,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
}
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
+ // The use of performance counters with threads would be unintuitive for
+ // the average user so we need to warn them about this case
+ if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
+ GetErrorLogInstance()
+ << "***WARNING*** There are " << benchmarks_with_threads
+ << " benchmarks with threads and " << perfcounters.num_counters()
+ << " performance counters were requested. Beware counters will "
+ "reflect the combined usage across all "
+ "threads.\n";
+ }
+
std::vector<size_t> repetition_indices;
repetition_indices.reserve(num_repetitions_total);
for (size_t runner_index = 0, num_runners = runners.size();
@@ -376,6 +427,12 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
if (runner.HasRepeatsRemaining()) continue;
// FIXME: report each repetition separately, not all of them in bulk.
+ display_reporter->ReportRunsConfig(
+ runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+ if (file_reporter)
+ file_reporter->ReportRunsConfig(
+ runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+
RunResults run_results = runner.GetResults();
// Maybe calculate complexity report
@@ -409,14 +466,15 @@ std::unique_ptr<BenchmarkReporter> CreateReporter(
typedef std::unique_ptr<BenchmarkReporter> PtrType;
if (name == "console") {
return PtrType(new ConsoleReporter(output_opts));
- } else if (name == "json") {
+ }
+ if (name == "json") {
return PtrType(new JSONReporter());
- } else if (name == "csv") {
+ }
+ if (name == "csv") {
return PtrType(new CSVReporter());
- } else {
- std::cerr << "Unexpected format: '" << name << "'\n";
- std::exit(1);
}
+ std::cerr << "Unexpected format: '" << name << "'\n";
+ std::exit(1);
}
BENCHMARK_RESTORE_DEPRECATED_WARNING
@@ -585,13 +643,17 @@ void PrintUsageAndExit() {
void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
if (time_unit_flag == "s") {
return SetDefaultTimeUnit(kSecond);
- } else if (time_unit_flag == "ms") {
+ }
+ if (time_unit_flag == "ms") {
return SetDefaultTimeUnit(kMillisecond);
- } else if (time_unit_flag == "us") {
+ }
+ if (time_unit_flag == "us") {
return SetDefaultTimeUnit(kMicrosecond);
- } else if (time_unit_flag == "ns") {
+ }
+ if (time_unit_flag == "ns") {
return SetDefaultTimeUnit(kNanosecond);
- } else if (!time_unit_flag.empty()) {
+ }
+ if (!time_unit_flag.empty()) {
PrintUsageAndExit();
}
}
@@ -604,7 +666,7 @@ void ParseCommandLineFlags(int* argc, char** argv) {
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
&FLAGS_benchmark_list_tests) ||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
- ParseDoubleFlag(argv[i], "benchmark_min_time",
+ ParseStringFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
&FLAGS_benchmark_min_warmup_time) ||
@@ -665,7 +727,7 @@ void PrintDefaultHelp() {
"benchmark"
" [--benchmark_list_tests={true|false}]\n"
" [--benchmark_filter=<regex>]\n"
- " [--benchmark_min_time=<min_time>]\n"
+ " [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_enable_random_interleaving={true|false}]\n"
@@ -676,6 +738,9 @@ void PrintDefaultHelp() {
" [--benchmark_out_format=<json|console|csv>]\n"
" [--benchmark_color={auto|true|false}]\n"
" [--benchmark_counters_tabular={true|false}]\n"
+#if defined HAVE_LIBPFM
+ " [--benchmark_perf_counters=<counter>,...]\n"
+#endif
" [--benchmark_context=<key>=<value>,...]\n"
" [--benchmark_time_unit={ns|us|ms|s}]\n"
" [--v=<verbosity>]\n");
diff --git a/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc b/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc
index 963fea22f3..286f986530 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc
@@ -93,24 +93,24 @@ State BenchmarkInstance::Run(
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const {
- State st(iters, args_, thread_id, threads_, timer, manager,
- perf_counters_measurement);
+ State st(name_.function_name, iters, args_, thread_id, threads_, timer,
+ manager, perf_counters_measurement);
benchmark_.Run(st);
return st;
}
void BenchmarkInstance::Setup() const {
if (setup_) {
- State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
- nullptr);
+ State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+ nullptr, nullptr, nullptr);
setup_(st);
}
}
void BenchmarkInstance::Teardown() const {
if (teardown_) {
- State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
- nullptr);
+ State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+ nullptr, nullptr, nullptr);
teardown_(st);
}
}
diff --git a/contrib/restricted/google/benchmark/src/benchmark_register.cc b/contrib/restricted/google/benchmark/src/benchmark_register.cc
index eae2c320f6..e447c9a2d3 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_register.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_register.cc
@@ -204,7 +204,7 @@ bool FindBenchmarksInternal(const std::string& re,
// Benchmark
//=============================================================================//
-Benchmark::Benchmark(const char* name)
+Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
time_unit_(GetDefaultTimeUnit()),
@@ -230,7 +230,7 @@ Benchmark::Benchmark(const char* name)
Benchmark::~Benchmark() {}
Benchmark* Benchmark::Name(const std::string& name) {
- SetName(name.c_str());
+ SetName(name);
return this;
}
@@ -468,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() {
return this;
}
-void Benchmark::SetName(const char* name) { name_ = name; }
+void Benchmark::SetName(const std::string& name) { name_ = name; }
+
+const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
@@ -478,6 +480,12 @@ int Benchmark::ArgsCnt() const {
return static_cast<int>(args_.front().size());
}
+const char* Benchmark::GetArgName(int arg) const {
+ BM_CHECK_GE(arg, 0);
+ BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
+ return arg_names_[arg].c_str();
+}
+
TimeUnit Benchmark::GetTimeUnit() const {
return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
}
diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.cc b/contrib/restricted/google/benchmark/src/benchmark_runner.cc
index f9ffbc5afb..5f683fe423 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_runner.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_runner.cc
@@ -28,11 +28,14 @@
#include <algorithm>
#include <atomic>
+#include <climits>
+#include <cmath>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
+#include <limits>
#include <memory>
#include <string>
#include <thread>
@@ -62,6 +65,8 @@ MemoryManager* memory_manager = nullptr;
namespace {
static constexpr IterationCount kMaxIterations = 1000000000;
+const double kDefaultMinTime =
+ std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
@@ -75,8 +80,8 @@ BenchmarkReporter::Run CreateRunReport(
report.run_name = b.name();
report.family_index = b.family_index();
report.per_family_instance_index = b.per_family_instance_index();
- report.error_occurred = results.has_error_;
- report.error_message = results.error_message_;
+ report.skipped = results.skipped_;
+ report.skip_message = results.skip_message_;
report.report_label = results.report_label_;
// This is the total iterations across all threads.
report.iterations = results.iterations;
@@ -85,7 +90,7 @@ BenchmarkReporter::Run CreateRunReport(
report.repetition_index = repetition_index;
report.repetitions = repeats;
- if (!report.error_occurred) {
+ if (!report.skipped) {
// This is the total time across all threads.
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
@@ -126,7 +131,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
State st =
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
- BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
+ BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
MutexLock l(manager->GetBenchmarkMutex());
@@ -141,27 +146,100 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
manager->NotifyThreadComplete();
}
+double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
+ const BenchTimeType& iters_or_time) {
+ if (!IsZero(b.min_time())) return b.min_time();
+ // If the flag was used to specify number of iters, then return the default
+ // min_time.
+ if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
+
+ return iters_or_time.time;
+}
+
+IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
+ const BenchTimeType& iters_or_time) {
+ if (b.iterations() != 0) return b.iterations();
+
+ // We've already concluded that this flag is currently used to pass
+ // iters but do a check here again anyway.
+ BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
+ return iters_or_time.iters;
+}
+
} // end namespace
+BenchTimeType ParseBenchMinTime(const std::string& value) {
+ BenchTimeType ret;
+
+ if (value.empty()) {
+ ret.tag = BenchTimeType::TIME;
+ ret.time = 0.0;
+ return ret;
+ }
+
+ if (value.back() == 'x') {
+ char* p_end;
+ // Reset errno before it's changed by strtol.
+ errno = 0;
+ IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
+
+ // After a valid parse, p_end should have been set to
+ // point to the 'x' suffix.
+ BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
+ << "Malformed iters value passed to --benchmark_min_time: `" << value
+ << "`. Expected --benchmark_min_time=<integer>x.";
+
+ ret.tag = BenchTimeType::ITERS;
+ ret.iters = num_iters;
+ return ret;
+ }
+
+ bool has_suffix = value.back() == 's';
+ if (!has_suffix) {
+ BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
+ "Eg., `30s` for 30-seconds.";
+ }
+
+ char* p_end;
+ // Reset errno before it's changed by strtod.
+ errno = 0;
+ double min_time = std::strtod(value.c_str(), &p_end);
+
+ // After a successful parse, p_end should point to the suffix 's',
+ // or the end of the string if the suffix was omitted.
+ BM_CHECK(errno == 0 && p_end != nullptr &&
+ ((has_suffix && *p_end == 's') || *p_end == '\0'))
+ << "Malformed seconds value passed to --benchmark_min_time: `" << value
+ << "`. Expected --benchmark_min_time=<float>x.";
+
+ ret.tag = BenchTimeType::TIME;
+ ret.time = min_time;
+
+ return ret;
+}
+
BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
+ PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
reports_for_family(reports_for_family_),
- min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
+ parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
+ min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
? b.min_warmup_time()
: FLAGS_benchmark_min_warmup_time),
warmup_done(!(min_warmup_time > 0.0)),
repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions),
- has_explicit_iteration_count(b.iterations() != 0),
+ has_explicit_iteration_count(b.iterations() != 0 ||
+ parsed_benchtime_flag.tag ==
+ BenchTimeType::ITERS),
pool(b.threads() - 1),
- iters(has_explicit_iteration_count ? b.iterations() : 1),
- perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')),
- perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
- ? &perf_counters_measurement
- : nullptr) {
+ iters(has_explicit_iteration_count
+ ? ComputeIters(b_, parsed_benchtime_flag)
+ : 1),
+ perf_counters_measurement_ptr(pcm_) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
@@ -174,7 +252,7 @@ BenchmarkRunner::BenchmarkRunner(
run_results.file_report_aggregates_only =
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
- perf_counters_measurement.IsValid())
+ (perf_counters_measurement_ptr->num_counters() == 0))
<< "Perf counters were requested but could not be set up.";
}
}
@@ -263,7 +341,7 @@ bool BenchmarkRunner::ShouldReportIterationResults(
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
- return i.results.has_error_ ||
+ return i.results.skipped_ ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >=
GetMinTimeToApply() || // The elapsed time is large enough.
@@ -389,10 +467,7 @@ void BenchmarkRunner::DoOneRepetition() {
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
-
- BENCHMARK_DISABLE_DEPRECATED_WARNING
- memory_manager->Stop(memory_result);
- BENCHMARK_RESTORE_DEPRECATED_WARNING
+ memory_manager->Stop(*memory_result);
}
// Ok, now actually report.
@@ -402,7 +477,7 @@ void BenchmarkRunner::DoOneRepetition() {
if (reports_for_family) {
++reports_for_family->num_runs_done;
- if (!report.error_occurred) reports_for_family->Runs.push_back(report);
+ if (!report.skipped) reports_for_family->Runs.push_back(report);
}
run_results.non_aggregates.push_back(report);
diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.h b/contrib/restricted/google/benchmark/src/benchmark_runner.h
index 0174bd3401..db2fa04396 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_runner.h
+++ b/contrib/restricted/google/benchmark/src/benchmark_runner.h
@@ -25,7 +25,7 @@
namespace benchmark {
-BM_DECLARE_double(benchmark_min_time);
+BM_DECLARE_string(benchmark_min_time);
BM_DECLARE_double(benchmark_min_warmup_time);
BM_DECLARE_int32(benchmark_repetitions);
BM_DECLARE_bool(benchmark_report_aggregates_only);
@@ -44,9 +44,21 @@ struct RunResults {
bool file_report_aggregates_only = false;
};
+struct BENCHMARK_EXPORT BenchTimeType {
+ enum { ITERS, TIME } tag;
+ union {
+ IterationCount iters;
+ double time;
+ };
+};
+
+BENCHMARK_EXPORT
+BenchTimeType ParseBenchMinTime(const std::string& value);
+
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
+ benchmark::internal::PerfCountersMeasurement* pmc_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
int GetNumRepeats() const { return repeats; }
@@ -63,12 +75,19 @@ class BenchmarkRunner {
return reports_for_family;
}
+ double GetMinTime() const { return min_time; }
+
+ bool HasExplicitIters() const { return has_explicit_iteration_count; }
+
+ IterationCount GetIters() const { return iters; }
+
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
+ BenchTimeType parsed_benchtime_flag;
const double min_time;
const double min_warmup_time;
bool warmup_done;
@@ -85,8 +104,7 @@ class BenchmarkRunner {
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
- PerfCountersMeasurement perf_counters_measurement;
- PerfCountersMeasurement* const perf_counters_measurement_ptr;
+ PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
struct IterationResults {
internal::ThreadManager::Result results;
diff --git a/contrib/restricted/google/benchmark/src/colorprint.cc b/contrib/restricted/google/benchmark/src/colorprint.cc
index 1a000a0637..9a653c5007 100644
--- a/contrib/restricted/google/benchmark/src/colorprint.cc
+++ b/contrib/restricted/google/benchmark/src/colorprint.cc
@@ -96,18 +96,18 @@ std::string FormatString(const char* msg, va_list args) {
// currently there is no error handling for failure, so this is hack.
BM_CHECK(ret >= 0);
- if (ret == 0) // handle empty expansion
+ if (ret == 0) { // handle empty expansion
return {};
- else if (static_cast<size_t>(ret) < size)
+ }
+ if (static_cast<size_t>(ret) < size) {
return local_buff;
- else {
- // we did not provide a long enough buffer on our first attempt.
- size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
- std::unique_ptr<char[]> buff(new char[size]);
- ret = vsnprintf(buff.get(), size, msg, args);
- BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
- return buff.get();
}
+ // we did not provide a long enough buffer on our first attempt.
+ size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
+ std::unique_ptr<char[]> buff(new char[size]);
+ ret = vsnprintf(buff.get(), size, msg, args);
+ BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
+ return buff.get();
}
std::string FormatString(const char* msg, ...) {
@@ -163,7 +163,7 @@ bool IsColorTerminal() {
#else
// On non-Windows platforms, we rely on the TERM variable. This list of
// supported TERM values is copied from Google Test:
- // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
+ // <https://github.com/google/googletest/blob/main/googletest/src/gtest.cc#L2925>.
const char* const SUPPORTED_TERM_VALUES[] = {
"xterm", "xterm-color", "xterm-256color",
"screen", "screen-256color", "tmux",
diff --git a/contrib/restricted/google/benchmark/src/commandlineflags.cc b/contrib/restricted/google/benchmark/src/commandlineflags.cc
index 1f555b2757..dcb414959d 100644
--- a/contrib/restricted/google/benchmark/src/commandlineflags.cc
+++ b/contrib/restricted/google/benchmark/src/commandlineflags.cc
@@ -284,14 +284,15 @@ bool IsTruthyFlagValue(const std::string& value) {
char v = value[0];
return isalnum(v) &&
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
- } else if (!value.empty()) {
+ }
+ if (!value.empty()) {
std::string value_lower(value);
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
[](char c) { return static_cast<char>(::tolower(c)); });
return !(value_lower == "false" || value_lower == "no" ||
value_lower == "off");
- } else
- return true;
+ }
+ return true;
}
} // end namespace benchmark
diff --git a/contrib/restricted/google/benchmark/src/complexity.h b/contrib/restricted/google/benchmark/src/complexity.h
index df29b48d29..0a0679b48b 100644
--- a/contrib/restricted/google/benchmark/src/complexity.h
+++ b/contrib/restricted/google/benchmark/src/complexity.h
@@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports);
// This data structure will contain the result returned by MinimalLeastSq
-// - coef : Estimated coeficient for the high-order term as
+// - coef : Estimated coefficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability
diff --git a/contrib/restricted/google/benchmark/src/console_reporter.cc b/contrib/restricted/google/benchmark/src/console_reporter.cc
index 3950e49814..10e05e133e 100644
--- a/contrib/restricted/google/benchmark/src/console_reporter.cc
+++ b/contrib/restricted/google/benchmark/src/console_reporter.cc
@@ -115,7 +115,7 @@ static std::string FormatTime(double time) {
if (time < 100.0) {
return FormatString("%10.1f", time);
}
- // Assuming the time ist at max 9.9999e+99 and we have 10 digits for the
+ // Assuming the time is at max 9.9999e+99 and we have 10 digits for the
// number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
if (time > 9999999999 /*max 10 digit number*/) {
return FormatString("%1.4e", time);
@@ -135,9 +135,13 @@ void ConsoleReporter::PrintRunData(const Run& result) {
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());
- if (result.error_occurred) {
+ if (internal::SkippedWithError == result.skipped) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
- result.error_message.c_str());
+ result.skip_message.c_str());
+ printer(Out, COLOR_DEFAULT, "\n");
+ return;
+ } else if (internal::SkippedWithMessage == result.skipped) {
+ printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
diff --git a/contrib/restricted/google/benchmark/src/csv_reporter.cc b/contrib/restricted/google/benchmark/src/csv_reporter.cc
index 83c94573f5..7b56da107e 100644
--- a/contrib/restricted/google/benchmark/src/csv_reporter.cc
+++ b/contrib/restricted/google/benchmark/src/csv_reporter.cc
@@ -109,10 +109,10 @@ BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
Out << CsvEscape(run.benchmark_name()) << ",";
- if (run.error_occurred) {
+ if (run.skipped) {
Out << std::string(elements.size() - 3, ',');
- Out << "true,";
- Out << CsvEscape(run.error_message) << "\n";
+ Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
+ Out << CsvEscape(run.skip_message) << "\n";
return;
}
diff --git a/contrib/restricted/google/benchmark/src/cycleclock.h b/contrib/restricted/google/benchmark/src/cycleclock.h
index 827f21b927..1295880b2e 100644
--- a/contrib/restricted/google/benchmark/src/cycleclock.h
+++ b/contrib/restricted/google/benchmark/src/cycleclock.h
@@ -36,7 +36,8 @@
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
-#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64)
+#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
+ !defined(_M_ARM64EC)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
@@ -114,7 +115,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
-#elif defined(COMPILER_MSVC) && defined(_M_ARM64)
+#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
diff --git a/contrib/restricted/google/benchmark/src/internal_macros.h b/contrib/restricted/google/benchmark/src/internal_macros.h
index 396a390afb..8dd7d0c650 100644
--- a/contrib/restricted/google/benchmark/src/internal_macros.h
+++ b/contrib/restricted/google/benchmark/src/internal_macros.h
@@ -42,6 +42,12 @@
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
+ // WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
+ // We include windows.h which implicitly includes winapifamily.h for compatibility.
+ #ifndef NOMINMAX
+ #define NOMINMAX
+ #endif
+ #include <windows.h>
#if defined(WINAPI_FAMILY_PARTITION)
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define BENCHMARK_OS_WINDOWS_WIN32 1
diff --git a/contrib/restricted/google/benchmark/src/json_reporter.cc b/contrib/restricted/google/benchmark/src/json_reporter.cc
index d55a0e6f0b..6559dfd5e6 100644
--- a/contrib/restricted/google/benchmark/src/json_reporter.cc
+++ b/contrib/restricted/google/benchmark/src/json_reporter.cc
@@ -254,9 +254,12 @@ void JSONReporter::PrintRunData(Run const& run) {
BENCHMARK_UNREACHABLE();
}()) << ",\n";
}
- if (run.error_occurred) {
- out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
- out << indent << FormatKV("error_message", run.error_message) << ",\n";
+ if (internal::SkippedWithError == run.skipped) {
+ out << indent << FormatKV("error_occurred", true) << ",\n";
+ out << indent << FormatKV("error_message", run.skip_message) << ",\n";
+ } else if (internal::SkippedWithMessage == run.skipped) {
+ out << indent << FormatKV("skipped", true) << ",\n";
+ out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
@@ -294,7 +297,8 @@ void JSONReporter::PrintRunData(Run const& run) {
out << ",\n"
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
- auto report_if_present = [&out, &indent](const char* label, int64_t val) {
+ auto report_if_present = [&out, &indent](const std::string& label,
+ int64_t val) {
if (val != MemoryManager::TombstoneValue)
out << ",\n" << indent << FormatKV(label, val);
};
diff --git a/contrib/restricted/google/benchmark/src/perf_counters.cc b/contrib/restricted/google/benchmark/src/perf_counters.cc
index 8a60088ba7..5f2ac282ab 100644
--- a/contrib/restricted/google/benchmark/src/perf_counters.cc
+++ b/contrib/restricted/google/benchmark/src/perf_counters.cc
@@ -29,96 +29,215 @@ namespace internal {
constexpr size_t PerfCounterValues::kMaxCounters;
#if defined HAVE_LIBPFM
+
+size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
+ // Create a pointer for multiple reads
+ const size_t bufsize = values_.size() * sizeof(values_[0]);
+ char* ptr = reinterpret_cast<char*>(values_.data());
+ size_t size = bufsize;
+ for (int lead : leaders) {
+ auto read_bytes = ::read(lead, ptr, size);
+ if (read_bytes >= ssize_t(sizeof(uint64_t))) {
+ // Actual data bytes are all bytes minus initial padding
+ std::size_t data_bytes = read_bytes - sizeof(uint64_t);
+ // This should be very cheap since it's in hot cache
+ std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
+ // Increment our counters
+ ptr += data_bytes;
+ size -= data_bytes;
+ } else {
+ int err = errno;
+ GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
+ << " " << ::strerror(err) << "\n";
+ return 0;
+ }
+ }
+ return (bufsize - size) / sizeof(uint64_t);
+}
+
const bool PerfCounters::kSupported = true;
bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
+bool PerfCounters::IsCounterSupported(const std::string& name) {
+ perf_event_attr_t attr;
+ std::memset(&attr, 0, sizeof(attr));
+ pfm_perf_encode_arg_t arg;
+ std::memset(&arg, 0, sizeof(arg));
+ arg.attr = &attr;
+ const int mode = PFM_PLM3; // user mode only
+ int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
+ &arg);
+ return (ret == PFM_SUCCESS);
+}
+
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
- if (counter_names.empty()) {
- return NoCounters();
- }
- if (counter_names.size() > PerfCounterValues::kMaxCounters) {
- GetErrorLogInstance()
- << counter_names.size()
- << " counters were requested. The minimum is 1, the maximum is "
- << PerfCounterValues::kMaxCounters << "\n";
- return NoCounters();
- }
- std::vector<int> counter_ids(counter_names.size());
+ // Valid counters will populate these arrays but we start empty
+ std::vector<std::string> valid_names;
+ std::vector<int> counter_ids;
+ std::vector<int> leader_ids;
- const int mode = PFM_PLM3; // user mode only
+ // Resize to the maximum possible
+ valid_names.reserve(counter_names.size());
+ counter_ids.reserve(counter_names.size());
+
+ const int kCounterMode = PFM_PLM3; // user mode only
+
+ // Group leads will be assigned on demand. The idea is that once we cannot
+ // create a counter descriptor, the reason is that this group has maxed out
+ // so we set the group_id again to -1 and retry - giving the algorithm a
+ // chance to create a new group leader to hold the next set of counters.
+ int group_id = -1;
+
+ // Loop through all performance counters
for (size_t i = 0; i < counter_names.size(); ++i) {
- const bool is_first = i == 0;
- struct perf_event_attr attr {};
- attr.size = sizeof(attr);
- const int group_id = !is_first ? counter_ids[0] : -1;
+ // we are about to push into the valid names vector
+ // check if we did not reach the maximum
+ if (valid_names.size() == PerfCounterValues::kMaxCounters) {
+ // Log a message if we maxed out and stop adding
+ GetErrorLogInstance()
+ << counter_names.size() << " counters were requested. The maximum is "
+ << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
+ << " were already added. All remaining counters will be ignored\n";
+ // stop the loop and return what we have already
+ break;
+ }
+
+ // Check if this name is empty
const auto& name = counter_names[i];
if (name.empty()) {
- GetErrorLogInstance() << "A counter name was the empty string\n";
- return NoCounters();
+ GetErrorLogInstance()
+ << "A performance counter name was the empty string\n";
+ continue;
}
+
+ // Here first means first in group, ie the group leader
+ const bool is_first = (group_id < 0);
+
+ // This struct will be populated by libpfm from the counter string
+ // and then fed into the syscall perf_event_open
+ struct perf_event_attr attr {};
+ attr.size = sizeof(attr);
+
+ // This is the input struct to libpfm.
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;
-
- const int pfm_get =
- pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
+ const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
+ PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
- GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
- return NoCounters();
+ GetErrorLogInstance()
+ << "Unknown performance counter name: " << name << "\n";
+ continue;
}
- attr.disabled = is_first;
- // Note: the man page for perf_event_create suggests inerit = true and
+
+ // We then proceed to populate the remaining fields in our attribute struct
+ // Note: the man page for perf_event_create suggests inherit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
+ attr.disabled = is_first;
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
attr.exclude_hv = true;
- // Read all counters in one read.
+
+ // Read all counters in a group in one read.
attr.read_format = PERF_FORMAT_GROUP;
int id = -1;
- static constexpr size_t kNrOfSyscallRetries = 5;
- // Retry syscall as it was interrupted often (b/64774091).
- for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
- ++num_retries) {
- id = perf_event_open(&attr, 0, -1, group_id, 0);
- if (id >= 0 || errno != EINTR) {
- break;
+ while (id < 0) {
+ static constexpr size_t kNrOfSyscallRetries = 5;
+ // Retry syscall as it was interrupted often (b/64774091).
+ for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
+ ++num_retries) {
+ id = perf_event_open(&attr, 0, -1, group_id, 0);
+ if (id >= 0 || errno != EINTR) {
+ break;
+ }
+ }
+ if (id < 0) {
+ // If the file descriptor is negative we might have reached a limit
+ // in the current group. Set the group_id to -1 and retry
+ if (group_id >= 0) {
+ // Create a new group
+ group_id = -1;
+ } else {
+ // At this point we have already retried to set a new group id and
+ // failed. We then give up.
+ break;
+ }
}
}
+
+ // We failed to get a new file descriptor. We might have reached a hard
+ // hardware limit that cannot be resolved even with group multiplexing
if (id < 0) {
- GetErrorLogInstance()
- << "Failed to get a file descriptor for " << name << "\n";
- return NoCounters();
- }
+ GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
+ "for performance counter "
+ << name << ". Ignoring\n";
- counter_ids[i] = id;
+ // We give up on this counter but try to keep going
+ // as the others would be fine
+ continue;
+ }
+ if (group_id < 0) {
+ // This is a leader, store and assign it to the current file descriptor
+ leader_ids.push_back(id);
+ group_id = id;
+ }
+ // This is a valid counter, add it to our descriptor's list
+ counter_ids.push_back(id);
+ valid_names.push_back(name);
}
- if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
- GetErrorLogInstance() << "Failed to start counters\n";
- return NoCounters();
+
+ // Loop through all group leaders activating them
+ // There is another option of starting ALL counters in a process but
+ // that would be far reaching an intrusion. If the user is using PMCs
+ // by themselves then this would have a side effect on them. It is
+ // friendlier to loop through all groups individually.
+ for (int lead : leader_ids) {
+ if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
+ // This should never happen but if it does, we give up on the
+ // entire batch as recovery would be a mess.
+ GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
+ "Claring out all counters.\n";
+
+ // Close all peformance counters
+ for (int id : counter_ids) {
+ ::close(id);
+ }
+
+ // Return an empty object so our internal state is still good and
+ // the process can continue normally without impact
+ return NoCounters();
+ }
}
- return PerfCounters(counter_names, std::move(counter_ids));
+ return PerfCounters(std::move(valid_names), std::move(counter_ids),
+ std::move(leader_ids));
}
void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
- ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
+ for (int lead : leader_ids_) {
+ ioctl(lead, PERF_EVENT_IOC_DISABLE);
+ }
for (int fd : counter_ids_) {
close(fd);
}
}
#else // defined HAVE_LIBPFM
+size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
+
const bool PerfCounters::kSupported = false;
bool PerfCounters::Initialize() { return false; }
+bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
+
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
@@ -130,31 +249,10 @@ PerfCounters PerfCounters::Create(
void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
-Mutex PerfCountersMeasurement::mutex_;
-int PerfCountersMeasurement::ref_count_ = 0;
-PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
-
PerfCountersMeasurement::PerfCountersMeasurement(
const std::vector<std::string>& counter_names)
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
- MutexLock l(mutex_);
- if (ref_count_ == 0) {
- counters_ = PerfCounters::Create(counter_names);
- }
- // We chose to increment it even if `counters_` ends up invalid,
- // so that we don't keep trying to create, and also since the dtor
- // will decrement regardless of `counters_`'s validity
- ++ref_count_;
-
- BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names);
-}
-
-PerfCountersMeasurement::~PerfCountersMeasurement() {
- MutexLock l(mutex_);
- --ref_count_;
- if (ref_count_ == 0) {
- counters_ = PerfCounters::NoCounters();
- }
+ counters_ = PerfCounters::Create(counter_names);
}
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
@@ -162,6 +260,7 @@ PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
CloseCounters();
counter_ids_ = std::move(other.counter_ids_);
+ leader_ids_ = std::move(other.leader_ids_);
counter_names_ = std::move(other.counter_names_);
}
return *this;
diff --git a/contrib/restricted/google/benchmark/src/perf_counters.h b/contrib/restricted/google/benchmark/src/perf_counters.h
index 680555d4b0..152a6f2561 100644
--- a/contrib/restricted/google/benchmark/src/perf_counters.h
+++ b/contrib/restricted/google/benchmark/src/perf_counters.h
@@ -17,6 +17,7 @@
#include <array>
#include <cstdint>
+#include <cstring>
#include <memory>
#include <vector>
@@ -44,18 +45,21 @@ namespace internal {
// The implementation ensures the storage is inlined, and allows 0-based
// indexing into the counter values.
// The object is used in conjunction with a PerfCounters object, by passing it
-// to Snapshot(). The values are populated such that
-// perfCounters->names()[i]'s value is obtained at position i (as given by
-// operator[]) of this object.
-class PerfCounterValues {
+// to Snapshot(). The Read() method relocates individual reads, discarding
+// the initial padding from each group leader in the values buffer such that
+// all user accesses through the [] operator are correct.
+class BENCHMARK_EXPORT PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
}
- uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
+ // We are reading correctly now so the values don't need to skip padding
+ uint64_t operator[](size_t pos) const { return values_[pos]; }
- static constexpr size_t kMaxCounters = 3;
+ // Increased the maximum to 32 only since the buffer
+ // is std::array<> backed
+ static constexpr size_t kMaxCounters = 32;
private:
friend class PerfCounters;
@@ -66,7 +70,14 @@ class PerfCounterValues {
sizeof(uint64_t) * (kPadding + nr_counters_)};
}
- static constexpr size_t kPadding = 1;
+ // This reading is complex and as the goal of this class is to
+ // abstract away the intrincacies of the reading process, this is
+ // a better place for it
+ size_t Read(const std::vector<int>& leaders);
+
+ // Move the padding to 2 due to the reading algorithm (1st padding plus a
+ // current read padding)
+ static constexpr size_t kPadding = 2;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
};
@@ -79,10 +90,11 @@ class BENCHMARK_EXPORT PerfCounters final {
// True iff this platform supports performance counters.
static const bool kSupported;
- bool IsValid() const { return !counter_names_.empty(); }
+ // Returns an empty object
static PerfCounters NoCounters() { return PerfCounters(); }
~PerfCounters() { CloseCounters(); }
+ PerfCounters() = default;
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
PerfCounters& operator=(PerfCounters&&) noexcept;
@@ -92,11 +104,15 @@ class BENCHMARK_EXPORT PerfCounters final {
// initialization here.
static bool Initialize();
+ // Check if the given counter is supported, if the app wants to
+ // check before passing
+ static bool IsCounterSupported(const std::string& name);
+
// Return a PerfCounters object ready to read the counters with the names
// specified. The values are user-mode only. The counter name format is
// implementation and OS specific.
- // TODO: once we move to C++-17, this should be a std::optional, and then the
- // IsValid() boolean can be dropped.
+ // In case of failure, this method will in the worst case return an
+ // empty object whose state will still be valid.
static PerfCounters Create(const std::vector<std::string>& counter_names);
// Take a snapshot of the current value of the counters into the provided
@@ -105,10 +121,7 @@ class BENCHMARK_EXPORT PerfCounters final {
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
- assert(IsValid());
- auto buffer = values->get_data_buffer();
- auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
- return static_cast<size_t>(read_bytes) == buffer.second;
+ return values->Read(leader_ids_) == counter_ids_.size();
#else
(void)values;
return false;
@@ -120,13 +133,15 @@ class BENCHMARK_EXPORT PerfCounters final {
private:
PerfCounters(const std::vector<std::string>& counter_names,
- std::vector<int>&& counter_ids)
- : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
- PerfCounters() = default;
+ std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
+ : counter_ids_(std::move(counter_ids)),
+ leader_ids_(std::move(leader_ids)),
+ counter_names_(counter_names) {}
void CloseCounters() const;
std::vector<int> counter_ids_;
+ std::vector<int> leader_ids_;
std::vector<std::string> counter_names_;
};
@@ -134,33 +149,25 @@ class BENCHMARK_EXPORT PerfCounters final {
class BENCHMARK_EXPORT PerfCountersMeasurement final {
public:
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
- ~PerfCountersMeasurement();
-
- // The only way to get to `counters_` is after ctor-ing a
- // `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
- // decided (either invalid or valid) and won't change again even if a ctor is
- // concurrently running with this. This is preferring efficiency to
- // maintainability, because the address of the static can be known at compile
- // time.
- bool IsValid() const {
- MutexLock l(mutex_);
- return counters_.IsValid();
- }
- BENCHMARK_ALWAYS_INLINE void Start() {
- assert(IsValid());
- MutexLock l(mutex_);
+ size_t num_counters() const { return counters_.num_counters(); }
+
+ std::vector<std::string> names() const { return counters_.names(); }
+
+ BENCHMARK_ALWAYS_INLINE bool Start() {
+ if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
+
+ return valid_read_;
}
BENCHMARK_ALWAYS_INLINE bool Stop(
std::vector<std::pair<std::string, double>>& measurements) {
- assert(IsValid());
- MutexLock l(mutex_);
+ if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
@@ -177,9 +184,7 @@ class BENCHMARK_EXPORT PerfCountersMeasurement final {
}
private:
- static Mutex mutex_;
- GUARDED_BY(mutex_) static int ref_count_;
- GUARDED_BY(mutex_) static PerfCounters counters_;
+ PerfCounters counters_;
bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
diff --git a/contrib/restricted/google/benchmark/src/sleep.cc b/contrib/restricted/google/benchmark/src/sleep.cc
deleted file mode 100644
index ab59000f24..0000000000
--- a/contrib/restricted/google/benchmark/src/sleep.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "sleep.h"
-
-#include <cerrno>
-#include <cstdlib>
-#include <ctime>
-
-#include "internal_macros.h"
-
-#ifdef BENCHMARK_OS_WINDOWS
-#include <windows.h>
-#endif
-
-#ifdef BENCHMARK_OS_ZOS
-#include <unistd.h>
-#endif
-
-namespace benchmark {
-#ifdef BENCHMARK_OS_WINDOWS
-// Window's Sleep takes milliseconds argument.
-void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
-void SleepForSeconds(double seconds) {
- SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
-}
-#else // BENCHMARK_OS_WINDOWS
-void SleepForMicroseconds(int microseconds) {
-#ifdef BENCHMARK_OS_ZOS
- // z/OS does not support nanosleep. Instead call sleep() and then usleep() to
- // sleep for the remaining microseconds because usleep() will fail if its
- // argument is greater than 1000000.
- div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
- int seconds = sleepTime.quot;
- while (seconds != 0) seconds = sleep(seconds);
- while (usleep(sleepTime.rem) == -1 && errno == EINTR)
- ;
-#else
- struct timespec sleep_time;
- sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
- sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
- while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
- ; // Ignore signals and wait for the full interval to elapse.
-#endif
-}
-
-void SleepForMilliseconds(int milliseconds) {
- SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
-}
-
-void SleepForSeconds(double seconds) {
- SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
-}
-#endif // BENCHMARK_OS_WINDOWS
-} // end namespace benchmark
diff --git a/contrib/restricted/google/benchmark/src/sleep.h b/contrib/restricted/google/benchmark/src/sleep.h
deleted file mode 100644
index f98551afe2..0000000000
--- a/contrib/restricted/google/benchmark/src/sleep.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef BENCHMARK_SLEEP_H_
-#define BENCHMARK_SLEEP_H_
-
-namespace benchmark {
-const int kNumMillisPerSecond = 1000;
-const int kNumMicrosPerMilli = 1000;
-const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
-const int kNumNanosPerMicro = 1000;
-const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
-
-void SleepForMilliseconds(int milliseconds);
-void SleepForSeconds(double seconds);
-} // end namespace benchmark
-
-#endif // BENCHMARK_SLEEP_H_
diff --git a/contrib/restricted/google/benchmark/src/statistics.cc b/contrib/restricted/google/benchmark/src/statistics.cc
index 5ba885ab00..c4b54b271f 100644
--- a/contrib/restricted/google/benchmark/src/statistics.cc
+++ b/contrib/restricted/google/benchmark/src/statistics.cc
@@ -89,9 +89,8 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;
- auto error_count =
- std::count_if(reports.begin(), reports.end(),
- [](Run const& run) { return run.error_occurred; });
+ auto error_count = std::count_if(reports.begin(), reports.end(),
+ [](Run const& run) { return run.skipped; });
if (reports.size() - error_count < 2) {
// We don't report aggregated data if there was a single run.
@@ -133,7 +132,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
for (Run const& run : reports) {
BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
BM_CHECK_EQ(run_iterations, run.iterations);
- if (run.error_occurred) continue;
+ if (run.skipped) continue;
real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
// user counters
diff --git a/contrib/restricted/google/benchmark/src/statistics.h b/contrib/restricted/google/benchmark/src/statistics.h
index b0d2c05e72..6e5560e8f1 100644
--- a/contrib/restricted/google/benchmark/src/statistics.h
+++ b/contrib/restricted/google/benchmark/src/statistics.h
@@ -22,9 +22,10 @@
namespace benchmark {
-// Return a vector containing the mean, median and standard devation information
-// (and any user-specified info) for the specified list of reports. If 'reports'
-// contains less than two non-errored runs an empty vector is returned
+// Return a vector containing the mean, median and standard deviation
+// information (and any user-specified info) for the specified list of reports.
+// If 'reports' contains less than two non-errored runs an empty vector is
+// returned
BENCHMARK_EXPORT
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports);
diff --git a/contrib/restricted/google/benchmark/src/string_util.cc b/contrib/restricted/google/benchmark/src/string_util.cc
index b3196fc266..5e2d24a3cd 100644
--- a/contrib/restricted/google/benchmark/src/string_util.cc
+++ b/contrib/restricted/google/benchmark/src/string_util.cc
@@ -94,10 +94,10 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) {
const char* array =
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
- if (iec)
+ if (iec) {
return array[index] + std::string("i");
- else
- return std::string(1, array[index]);
+ }
+ return std::string(1, array[index]);
}
std::string ToBinaryStringFullySpecified(double value, double threshold,
diff --git a/contrib/restricted/google/benchmark/src/sysinfo.cc b/contrib/restricted/google/benchmark/src/sysinfo.cc
index e763d5c481..80eece3ae7 100644
--- a/contrib/restricted/google/benchmark/src/sysinfo.cc
+++ b/contrib/restricted/google/benchmark/src/sysinfo.cc
@@ -46,6 +46,9 @@
#if defined(BENCHMARK_OS_QURT)
#error #include <qurt.h>
#endif
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+#include <pthread.h>
+#endif
#include <algorithm>
#include <array>
@@ -62,15 +65,17 @@
#include <limits>
#include <locale>
#include <memory>
+#include <random>
#include <sstream>
#include <utility>
+#include "benchmark/benchmark.h"
#include "check.h"
#include "cycleclock.h"
#include "internal_macros.h"
#include "log.h"
-#include "sleep.h"
#include "string_util.h"
+#include "timers.h"
namespace benchmark {
namespace {
@@ -423,19 +428,12 @@ std::string GetSystemName() {
#ifndef UNICODE
str = std::string(hostname, DWCOUNT);
#else
- std::vector<wchar_t> converted;
- // Find the length first.
- int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname,
- DWCOUNT, converted.begin(), 0);
- // TODO: Report error from GetLastError()?
- if (len == 0) return std::string("");
- converted.reserve(len + 1);
-
- len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname, DWCOUNT,
- converted.begin(), converted.size());
- // TODO: Report error from GetLastError()?
- if (len == 0) return std::string("");
- str = std::string(converted.data());
+ // `WideCharToMultiByte` returns `0` when conversion fails.
+ int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname,
+ DWCOUNT, NULL, 0, NULL, NULL);
+ str.resize(len);
+ WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0],
+ str.size(), NULL, NULL);
#endif
return str;
#elif defined(BENCHMARK_OS_QURT)
@@ -448,7 +446,7 @@ std::string GetSystemName() {
return str;
#else
#ifndef HOST_NAME_MAX
-#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined
+#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined
#define HOST_NAME_MAX 64
#elif defined(BENCHMARK_OS_NACL)
#define HOST_NAME_MAX 64
@@ -551,6 +549,80 @@ int GetNumCPUs() {
BENCHMARK_UNREACHABLE();
}
+class ThreadAffinityGuard final {
+ public:
+ ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
+ if (!reset_affinity)
+ std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
+ "frequency may be incorrect."
+ << std::endl;
+ }
+
+ ~ThreadAffinityGuard() {
+ if (!reset_affinity) return;
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
+ &previous_affinity);
+ if (ret == 0) return;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
+ if (ret != 0) return;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ PrintErrorAndDie("Failed to reset thread affinity");
+ }
+
+ ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
+ ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
+ ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
+ ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
+
+ private:
+ bool SetAffinity() {
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ int ret;
+ self = pthread_self();
+ ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
+ &previous_affinity);
+ if (ret != 0) return false;
+
+ cpu_set_t affinity;
+ memcpy(&affinity, &previous_affinity, sizeof(affinity));
+
+ bool is_first_cpu = true;
+
+ for (int i = 0; i < CPU_SETSIZE; ++i)
+ if (CPU_ISSET(i, &affinity)) {
+ if (is_first_cpu)
+ is_first_cpu = false;
+ else
+ CPU_CLR(i, &affinity);
+ }
+
+ if (is_first_cpu) return false;
+
+ ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
+ return ret == 0;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ self = GetCurrentThread();
+ DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
+ previous_affinity = SetThreadAffinityMask(self, mask);
+ return previous_affinity != 0;
+#else
+ return false;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ }
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ pthread_t self;
+ cpu_set_t previous_affinity;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ HANDLE self;
+ DWORD_PTR previous_affinity;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ bool reset_affinity;
+};
+
double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
// Currently, scaling is only used on linux path here,
// suppress diagnostics about it being unused on other paths.
@@ -706,10 +778,39 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
return 1000000000;
#endif
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
- static constexpr int estimate_time_ms = 1000;
+
+ // Make sure to use the same cycle counter when starting and stopping the
+ // cycle timer. We just pin the current thread to a cpu in the previous
+ // affinity set.
+ ThreadAffinityGuard affinity_guard;
+
+ static constexpr double estimate_time_s = 1.0;
+ const double start_time = ChronoClockNow();
const auto start_ticks = cycleclock::Now();
- SleepForMilliseconds(estimate_time_ms);
- return static_cast<double>(cycleclock::Now() - start_ticks);
+
+ // Impose load instead of calling sleep() to make sure the cycle counter
+ // works.
+ using PRNG = std::minstd_rand;
+ using Result = PRNG::result_type;
+ PRNG rng(static_cast<Result>(start_ticks));
+
+ Result state = 0;
+
+ do {
+ static constexpr size_t batch_size = 10000;
+ rng.discard(batch_size);
+ state += rng();
+
+ } while (ChronoClockNow() - start_time < estimate_time_s);
+
+ DoNotOptimize(state);
+
+ const auto end_ticks = cycleclock::Now();
+ const double end_time = ChronoClockNow();
+
+ return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
+ // Reset the affinity of current thread when the lifetime of affinity_guard
+ // ends.
}
std::vector<double> GetLoadAvg() {
diff --git a/contrib/restricted/google/benchmark/src/thread_manager.h b/contrib/restricted/google/benchmark/src/thread_manager.h
index 4680285089..819b3c44db 100644
--- a/contrib/restricted/google/benchmark/src/thread_manager.h
+++ b/contrib/restricted/google/benchmark/src/thread_manager.h
@@ -43,8 +43,8 @@ class ThreadManager {
double manual_time_used = 0;
int64_t complexity_n = 0;
std::string report_label_;
- std::string error_message_;
- bool has_error_ = false;
+ std::string skip_message_;
+ internal::Skipped skipped_ = internal::NotSkipped;
UserCounters counters;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
diff --git a/contrib/restricted/google/benchmark/src/timers.cc b/contrib/restricted/google/benchmark/src/timers.cc
index 0a4da83c19..042895d0d4 100644
--- a/contrib/restricted/google/benchmark/src/timers.cc
+++ b/contrib/restricted/google/benchmark/src/timers.cc
@@ -59,7 +59,6 @@
#include "check.h"
#include "log.h"
-#include "sleep.h"
#include "string_util.h"
namespace benchmark {
@@ -68,6 +67,9 @@ namespace benchmark {
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wunused-function"
#endif
+#if defined(__NVCOMPILER)
+#pragma diag_suppress declared_but_not_referenced
+#endif
namespace {
#if defined(BENCHMARK_OS_WINDOWS)
diff --git a/contrib/restricted/google/benchmark/test/string_util_gtest.cc b/contrib/restricted/google/benchmark/test/string_util_gtest.cc
index 698f2d43eb..8bfdb7a72c 100644
--- a/contrib/restricted/google/benchmark/test/string_util_gtest.cc
+++ b/contrib/restricted/google/benchmark/test/string_util_gtest.cc
@@ -2,6 +2,8 @@
// statistics_test - Unit tests for src/statistics.cc
//===---------------------------------------------------------------------===//
+#include <tuple>
+
#include "../src/internal_macros.h"
#include "../src/string_util.h"
#include "gtest/gtest.h"
@@ -63,7 +65,10 @@ TEST(StringUtilTest, stoul) {
EXPECT_EQ(4ul, pos);
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
- { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); }
+ {
+ ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"),
+ std::invalid_argument);
+ }
#endif
}
@@ -107,7 +112,10 @@ EXPECT_EQ(1ul, pos);
EXPECT_EQ(4ul, pos);
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); }
+{
+ ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"),
+ std::invalid_argument);
+}
#endif
}
@@ -137,7 +145,10 @@ EXPECT_EQ(1ul, pos);
EXPECT_EQ(8ul, pos);
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); }
+{
+ ASSERT_THROW(std::ignore = benchmark::stod("this is a test"),
+ std::invalid_argument);
+}
#endif
}
diff --git a/contrib/restricted/google/benchmark/test/ya.make b/contrib/restricted/google/benchmark/test/ya.make
index e5ca53e0d8..0106f3212f 100644
--- a/contrib/restricted/google/benchmark/test/ya.make
+++ b/contrib/restricted/google/benchmark/test/ya.make
@@ -20,6 +20,7 @@ CFLAGS(
-DBENCHMARK_STATIC_DEFINE
-DGTEST_LINKED_AS_SHARED_LIBRARY=1
-DHAVE_POSIX_REGEX
+ -DHAVE_PTHREAD_AFFINITY
-DHAVE_STD_REGEX
-DHAVE_STEADY_CLOCK
)
diff --git a/contrib/restricted/google/benchmark/ya.make b/contrib/restricted/google/benchmark/ya.make
index 8ef295a7b5..886e082a5b 100644
--- a/contrib/restricted/google/benchmark/ya.make
+++ b/contrib/restricted/google/benchmark/ya.make
@@ -1,4 +1,4 @@
-# Generated by devtools/yamaker from nixpkgs 22.05.
+# Generated by devtools/yamaker from nixpkgs 22.11.
LIBRARY()
@@ -6,9 +6,9 @@ LICENSE(Apache-2.0)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(1.7.1)
+VERSION(1.8.0)
-ORIGINAL_SOURCE(https://github.com/google/benchmark/archive/v1.7.1.tar.gz)
+ORIGINAL_SOURCE(https://github.com/google/benchmark/archive/v1.8.0.tar.gz)
ADDINCL(
GLOBAL contrib/restricted/google/benchmark/include
@@ -22,10 +22,17 @@ NO_UTIL()
CFLAGS(
GLOBAL -DBENCHMARK_STATIC_DEFINE
-DHAVE_POSIX_REGEX
+ -DHAVE_PTHREAD_AFFINITY
-DHAVE_STD_REGEX
-DHAVE_STEADY_CLOCK
)
+IF (OS_LINUX)
+ CFLAGS(
+ -DBENCHMARK_HAS_PTHREAD_AFFINITY
+ )
+ENDIF()
+
SRCS(
src/benchmark.cc
src/benchmark_api_internal.cc
@@ -42,7 +49,6 @@ SRCS(
src/json_reporter.cc
src/perf_counters.cc
src/reporter.cc
- src/sleep.cc
src/statistics.cc
src/string_util.cc
src/sysinfo.cc