Update contrib/restricted/google/benchmark to 1.8.0

author: thegeorg <thegeorg@yandex-team.com> 2023-06-14 13:05:42 +0300
committer: thegeorg <thegeorg@yandex-team.com> 2023-06-14 13:05:42 +0300
commit: 4b972da4fb8c047e6c1b876f3d026f213cade3b7 (patch)
tree: 4c7453323a0a572846b42a1091859c4867ae68f4
parent: bb4d56c9f2833ca3622dce9ee4a799910ea1549c (diff)
download: ydb-4b972da4fb8c047e6c1b876f3d026f213cade3b7.tar.gz
33 files changed, 834 insertions, 407 deletions
diff --git a/contrib/restricted/google/benchmark/AUTHORS b/contrib/restricted/google/benchmark/AUTHORS
index 98d2d98b05..bafecaddb5 100644
--- a/contrib/restricted/google/benchmark/AUTHORS
+++ b/contrib/restricted/google/benchmark/AUTHORS
@@ -32,6 +32,7 @@ Federico Ficarelli <federico.ficarelli@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
 Gergő Szitár <szitar.gergo@gmail.com>
 Google Inc.
+Henrique Bucher <hbucher@gmail.com>
 International Business Machines Corporation
 Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
 Jern-Kuan Leong <jernkuan@gmail.com>
@@ -42,8 +43,10 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
 Kishan Kumar <kumar.kishan@outlook.com>
 Lei Xu <eddyxu@gmail.com>
+Marcel Jacobse <mjacobse@uni-bremen.de>
 Matt Clarkson <mattyclarkson@gmail.com>
 Maxim Vafin <maxvafin@gmail.com>
+Mike Apodaca <gatorfax@gmail.com>
 MongoDB Inc.
 Nick Hutchinson <nshutchinson@gmail.com>
 Norman Heino <norman.heino@gmail.com>
diff --git a/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt
index cfad7dc3cc..0e16d88f39 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.darwin-x86_64.txt
@@ -13,6 +13,7 @@ target_compile_options(restricted-google-benchmark PUBLIC
 )
 target_compile_options(restricted-google-benchmark PRIVATE
   -DHAVE_POSIX_REGEX
+  -DHAVE_PTHREAD_AFFINITY
   -DHAVE_STD_REGEX
   -DHAVE_STEADY_CLOCK
   $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
@@ -42,7 +43,6 @@ target_sources(restricted-google-benchmark PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
-  ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt b/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt
index d1f192eee7..a18f8b0963 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.linux-aarch64.txt
@@ -13,8 +13,10 @@ target_compile_options(restricted-google-benchmark PUBLIC
 )
 target_compile_options(restricted-google-benchmark PRIVATE
   -DHAVE_POSIX_REGEX
+  -DHAVE_PTHREAD_AFFINITY
   -DHAVE_STD_REGEX
   -DHAVE_STEADY_CLOCK
+  -DBENCHMARK_HAS_PTHREAD_AFFINITY
   $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
 )
 target_include_directories(restricted-google-benchmark PUBLIC
@@ -43,7 +45,6 @@ target_sources(restricted-google-benchmark PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
-  ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt
index d1f192eee7..a18f8b0963 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.linux-x86_64.txt
@@ -13,8 +13,10 @@ target_compile_options(restricted-google-benchmark PUBLIC
 )
 target_compile_options(restricted-google-benchmark PRIVATE
   -DHAVE_POSIX_REGEX
+  -DHAVE_PTHREAD_AFFINITY
   -DHAVE_STD_REGEX
   -DHAVE_STEADY_CLOCK
+  -DBENCHMARK_HAS_PTHREAD_AFFINITY
   $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
 )
 target_include_directories(restricted-google-benchmark PUBLIC
@@ -43,7 +45,6 @@ target_sources(restricted-google-benchmark PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
-  ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt b/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt
index cfad7dc3cc..0e16d88f39 100644
--- a/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt
+++ b/contrib/restricted/google/benchmark/CMakeLists.windows-x86_64.txt
@@ -13,6 +13,7 @@ target_compile_options(restricted-google-benchmark PUBLIC
 )
 target_compile_options(restricted-google-benchmark PRIVATE
   -DHAVE_POSIX_REGEX
+  -DHAVE_PTHREAD_AFFINITY
   -DHAVE_STD_REGEX
   -DHAVE_STEADY_CLOCK
   $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything>
@@ -42,7 +43,6 @@ target_sources(restricted-google-benchmark PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/json_reporter.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/perf_counters.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/reporter.cc
-  ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sleep.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/statistics.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/string_util.cc
   ${CMAKE_SOURCE_DIR}/contrib/restricted/google/benchmark/src/sysinfo.cc
diff --git a/contrib/restricted/google/benchmark/README.md b/contrib/restricted/google/benchmark/README.md
index 205fb008af..b64048b7d3 100644
--- a/contrib/restricted/google/benchmark/README.md
+++ b/contrib/restricted/google/benchmark/README.md
@@ -5,7 +5,7 @@
 [![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint)
 [![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings)
 
-[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark)
+[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=main)](https://travis-ci.org/google/benchmark)
 [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark)
 
 
@@ -33,7 +33,7 @@ To get started, see [Requirements](#requirements) and
 [Installation](#installation). See [Usage](#usage) for a full example and the
 [User Guide](docs/user_guide.md) for a more comprehensive feature overview.
 
-It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md)
+It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md)
 as some of the structural aspects of the APIs are similar.
 
 ## Resources
@@ -139,6 +139,12 @@ cache variables, if autodetection fails.
 If you are using clang, you may need to set `LLVMAR_EXECUTABLE`,
 `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.
 
+To enable sanitizer checks (eg., `asan` and `tsan`), add:
+```
+ -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all"
+ -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all "  
+```
+
 ### Stable and Experimental Library Versions
 
 The main branch contains the latest stable version of the benchmarking library;
diff --git a/contrib/restricted/google/benchmark/include/benchmark/benchmark.h b/contrib/restricted/google/benchmark/include/benchmark/benchmark.h
index 77dcfbdc2a..4a8be19d86 100644
--- a/contrib/restricted/google/benchmark/include/benchmark/benchmark.h
+++ b/contrib/restricted/google/benchmark/include/benchmark/benchmark.h
@@ -218,6 +218,18 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #define BENCHMARK_UNUSED
 #endif
 
+// Used to annotate functions, methods and classes so they
+// are not optimized by the compiler. Useful for tests
+// where you expect loops to stay in place churning cycles
+#if defined(__clang__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
+#elif defined(__GNUC__) || defined(__GNUG__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
+#else
+// MSVC & Intel do not have a no-optimize attribute, only line pragmas
+#define BENCHMARK_DONT_OPTIMIZE
+#endif
+
 #if defined(__GNUC__) || defined(__clang__)
 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
 #elif defined(_MSC_VER) && !defined(__clang__)
@@ -231,13 +243,20 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
 
 // clang-format off
-#if defined(__GNUC__) && !defined(__NVCC__) || defined(__clang__)
+#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
 #define BENCHMARK_DISABLE_DEPRECATED_WARNING \
   _Pragma("GCC diagnostic push")             \
   _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
 #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
+#elif defined(__NVCOMPILER)
+#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
+  _Pragma("diagnostic push") \
+  _Pragma("diag_suppress deprecated_entity_with_custom_message")
+#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
 #else
 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
 #define BENCHMARK_DEPRECATED_MSG(msg)
@@ -280,6 +299,9 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 namespace benchmark {
 class BenchmarkReporter;
 
+// Default number of minimum benchmark running time in seconds.
+const char kDefaultMinTimeStr[] = "0.5s";
+
 BENCHMARK_EXPORT void PrintDefaultHelp();
 
 BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
@@ -383,13 +405,7 @@ class MemoryManager {
   virtual void Start() = 0;
 
   // Implement this to stop recording and fill out the given Result structure.
-  BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead")
-  virtual void Stop(Result* result) = 0;
-
-  // FIXME(vyng): Make this pure virtual once we've migrated current users.
-  BENCHMARK_DISABLE_DEPRECATED_WARNING
-  virtual void Stop(Result& result) { Stop(&result); }
-  BENCHMARK_RESTORE_DEPRECATED_WARNING
+  virtual void Stop(Result& result) = 0;
 };
 
 // Register a MemoryManager instance that will be used to collect and report
@@ -441,12 +457,21 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
 #if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
 template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+    "The const-ref version of this method can permit "
+    "undesired compiler optimizations in benchmarks")
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   asm volatile("" : : "r,m"(value) : "memory");
 }
 
 template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(
+#ifdef BENCHMARK_HAS_CXX11
+    Tp&& value
+#else
+    Tp& value
+#endif
+) {
 #if defined(__clang__)
   asm volatile("" : "+r,m"(value) : : "memory");
 #else
@@ -457,6 +482,9 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
 // Workaround for a bug with full argument copy overhead with GCC.
 // See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
 template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+    "The const-ref version of this method can permit "
+    "undesired compiler optimizations in benchmarks")
 inline BENCHMARK_ALWAYS_INLINE
     typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
                             (sizeof(Tp) <= sizeof(Tp*))>::type
@@ -465,6 +493,9 @@ inline BENCHMARK_ALWAYS_INLINE
 }
 
 template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+    "The const-ref version of this method can permit "
+    "undesired compiler optimizations in benchmarks")
 inline BENCHMARK_ALWAYS_INLINE
     typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
                             (sizeof(Tp) > sizeof(Tp*))>::type
@@ -476,7 +507,7 @@ template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE
     typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
                             (sizeof(Tp) <= sizeof(Tp*))>::type
-    DoNotOptimize(Tp& value) {
+    DoNotOptimize(Tp&& value) {
   asm volatile("" : "+m,r"(value) : : "memory");
 }
 
@@ -484,7 +515,7 @@ template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE
     typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
                             (sizeof(Tp) > sizeof(Tp*))>::type
-    DoNotOptimize(Tp& value) {
+    DoNotOptimize(Tp&& value) {
   asm volatile("" : "+m"(value) : : "memory");
 }
 
@@ -493,12 +524,21 @@ inline BENCHMARK_ALWAYS_INLINE
 // to use memory operations instead of operations with registers.
 // TODO: Remove if GCC < 5 will be unsupported.
 template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+    "The const-ref version of this method can permit "
+    "undesired compiler optimizations in benchmarks")
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   asm volatile("" : : "m"(value) : "memory");
 }
 
 template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(
+#ifdef BENCHMARK_HAS_CXX11
+    Tp&& value
+#else
+    Tp& value
+#endif
+) {
   asm volatile("" : "+m"(value) : : "memory");
 }
 #endif
@@ -510,6 +550,9 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
 #endif
 #elif defined(_MSC_VER)
 template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+    "The const-ref version of this method can permit "
+    "undesired compiler optimizations in benchmarks")
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
   _ReadWriteBarrier();
@@ -520,6 +563,9 @@ inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
 #endif
 #else
 template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+    "The const-ref version of this method can permit "
+    "undesired compiler optimizations in benchmarks")
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
@@ -640,6 +686,16 @@ enum AggregationReportMode
       ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
 };
 
+enum Skipped
+#if defined(BENCHMARK_HAS_CXX11)
+    : unsigned
+#endif
+{
+  NotSkipped = 0,
+  SkippedWithMessage,
+  SkippedWithError
+};
+
 }  // namespace internal
 
 // State is passed to a running Benchmark and contains state for the
@@ -676,8 +732,8 @@ class BENCHMARK_EXPORT State {
   //   }
   bool KeepRunningBatch(IterationCount n);
 
-  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
-  //           by the current thread.
+  // REQUIRES: timer is running and 'SkipWithMessage(...)' or
+  //   'SkipWithError(...)' has not been called by the current thread.
   // Stop the benchmark timer.  If not called, the timer will be
   // automatically stopped after the last iteration of the benchmark loop.
   //
@@ -692,8 +748,8 @@ class BENCHMARK_EXPORT State {
   // within each benchmark iteration, if possible.
   void PauseTiming();
 
-  // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
-  //           by the current thread.
+  // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
+  //   'SkipWithError(...)' has not been called by the current thread.
   // Start the benchmark timer.  The timer is NOT running on entrance to the
   // benchmark function. It begins running after control flow enters the
   // benchmark loop.
@@ -703,8 +759,30 @@ class BENCHMARK_EXPORT State {
   // within each benchmark iteration, if possible.
   void ResumeTiming();
 
-  // REQUIRES: 'SkipWithError(...)' has not been called previously by the
-  //            current thread.
+  // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+  //            called previously by the current thread.
+  // Report the benchmark as resulting in being skipped with the specified
+  // 'msg'.
+  // After this call the user may explicitly 'return' from the benchmark.
+  //
+  // If the ranged-for style of benchmark loop is used, the user must explicitly
+  // break from the loop, otherwise all future iterations will be run.
+  // If the 'KeepRunning()' loop is used the current thread will automatically
+  // exit the loop at the end of the current iteration.
+  //
+  // For threaded benchmarks only the current thread stops executing and future
+  // calls to `KeepRunning()` will block until all threads have completed
+  // the `KeepRunning()` loop. If multiple threads report being skipped only the
+  // first skip message is used.
+  //
+  // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
+  // the current scope immediately. If the function is called from within
+  // the 'KeepRunning()' loop the current iteration will finish. It is the users
+  // responsibility to exit the scope as needed.
+  void SkipWithMessage(const std::string& msg);
+
+  // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+  //            called previously by the current thread.
   // Report the benchmark as resulting in an error with the specified 'msg'.
   // After this call the user may explicitly 'return' from the benchmark.
   //
@@ -722,10 +800,13 @@ class BENCHMARK_EXPORT State {
   // the current scope immediately. If the function is called from within
   // the 'KeepRunning()' loop the current iteration will finish. It is the users
   // responsibility to exit the scope as needed.
-  void SkipWithError(const char* msg);
+  void SkipWithError(const std::string& msg);
+
+  // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
+  bool skipped() const { return internal::NotSkipped != skipped_; }
 
   // Returns true if an error has been reported with 'SkipWithError(...)'.
-  bool error_occurred() const { return error_occurred_; }
+  bool error_occurred() const { return internal::SkippedWithError == skipped_; }
 
   // REQUIRES: called exactly once per iteration of the benchmarking loop.
   // Set the manually measured time for this benchmark iteration, which
@@ -796,11 +877,7 @@ class BENCHMARK_EXPORT State {
   //  BM_Compress   50         50   14115038  compress:27.3%
   //
   // REQUIRES: a benchmark has exited its benchmarking loop.
-  void SetLabel(const char* label);
-
-  void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
-    this->SetLabel(str.c_str());
-  }
+  void SetLabel(const std::string& label);
 
   // Range arguments for this run. CHECKs if the argument has been set.
   BENCHMARK_ALWAYS_INLINE
@@ -831,6 +908,9 @@ class BENCHMARK_EXPORT State {
     return max_iterations - total_iterations_ + batch_leftover_;
   }
 
+  BENCHMARK_ALWAYS_INLINE
+  std::string name() const { return name_; }
+
  private:
   // items we expect on the first cache line (ie 64 bytes of the struct)
   // When total_iterations_ is 0, KeepRunning() and friends will return false.
@@ -848,7 +928,7 @@ class BENCHMARK_EXPORT State {
  private:
   bool started_;
   bool finished_;
-  bool error_occurred_;
+  internal::Skipped skipped_;
 
   // items we don't need on the first cache line
   std::vector<int64_t> range_;
@@ -860,9 +940,9 @@ class BENCHMARK_EXPORT State {
   UserCounters counters;
 
  private:
-  State(IterationCount max_iters, const std::vector<int64_t>& ranges,
-        int thread_i, int n_threads, internal::ThreadTimer* timer,
-        internal::ThreadManager* manager,
+  State(std::string name, IterationCount max_iters,
+        const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+        internal::ThreadTimer* timer, internal::ThreadManager* manager,
         internal::PerfCountersMeasurement* perf_counters_measurement);
 
   void StartKeepRunning();
@@ -871,6 +951,7 @@ class BENCHMARK_EXPORT State {
   bool KeepRunningInternal(IterationCount n, bool is_batch);
   void FinishKeepRunning();
 
+  const std::string name_;
   const int thread_index_;
   const int threads_;
 
@@ -902,7 +983,7 @@ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
   }
   if (!started_) {
     StartKeepRunning();
-    if (!error_occurred_ && total_iterations_ >= n) {
+    if (!skipped() && total_iterations_ >= n) {
       total_iterations_ -= n;
       return true;
     }
@@ -932,7 +1013,7 @@ struct State::StateIterator {
 
   BENCHMARK_ALWAYS_INLINE
   explicit StateIterator(State* st)
-      : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
+      : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
 
  public:
   BENCHMARK_ALWAYS_INLINE
@@ -1083,11 +1164,12 @@ class BENCHMARK_EXPORT Benchmark {
   Benchmark* MinWarmUpTime(double t);
 
   // Specify the amount of iterations that should be run by this benchmark.
+  // This option overrides the `benchmark_min_time` flag.
   // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
   //
   // NOTE: This function should only be used when *exact* iteration control is
   //   needed and never to control or limit how long a benchmark runs, where
-  // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
+  // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
   Benchmark* Iterations(IterationCount n);
 
   // Specify the amount of times to repeat this benchmark. This option overrides
@@ -1175,10 +1257,13 @@ class BENCHMARK_EXPORT Benchmark {
   TimeUnit GetTimeUnit() const;
 
  protected:
-  explicit Benchmark(const char* name);
-  void SetName(const char* name);
+  explicit Benchmark(const std::string& name);
+  void SetName(const std::string& name);
 
+ public:
+  const char* GetName() const;
   int ArgsCnt() const;
+  const char* GetArgName(int arg) const;
 
  private:
   friend class BenchmarkFamilies;
@@ -1228,12 +1313,12 @@ class BENCHMARK_EXPORT Benchmark {
 // the specified functor 'fn'.
 //
 // RETURNS: A pointer to the registered benchmark.
-internal::Benchmark* RegisterBenchmark(const char* name,
+internal::Benchmark* RegisterBenchmark(const std::string& name,
                                        internal::Function* fn);
 
 #if defined(BENCHMARK_HAS_CXX11)
 template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
 #endif
 
 // Remove all registered benchmarks. All pointers to previously registered
@@ -1245,10 +1330,10 @@ namespace internal {
 // (ie those created using the BENCHMARK(...) macros.
 class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
  public:
-  FunctionBenchmark(const char* name, Function* func)
+  FunctionBenchmark(const std::string& name, Function* func)
       : Benchmark(name), func_(func) {}
 
-  virtual void Run(State& st) BENCHMARK_OVERRIDE;
+  void Run(State& st) BENCHMARK_OVERRIDE;
 
  private:
   Function* func_;
@@ -1258,24 +1343,24 @@ class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
 template <class Lambda>
 class LambdaBenchmark : public Benchmark {
  public:
-  virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
+  void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
 
  private:
   template <class OLambda>
-  LambdaBenchmark(const char* name, OLambda&& lam)
+  LambdaBenchmark(const std::string& name, OLambda&& lam)
       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
 
   LambdaBenchmark(LambdaBenchmark const&) = delete;
 
   template <class Lam>  // NOLINTNEXTLINE(readability-redundant-declaration)
-  friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+  friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&);
 
   Lambda lambda_;
 };
 #endif
 }  // namespace internal
 
-inline internal::Benchmark* RegisterBenchmark(const char* name,
+inline internal::Benchmark* RegisterBenchmark(const std::string& name,
                                               internal::Function* fn) {
   return internal::RegisterBenchmarkInternal(
       ::new internal::FunctionBenchmark(name, fn));
@@ -1283,7 +1368,7 @@ inline internal::Benchmark* RegisterBenchmark(const char* name,
 
 #ifdef BENCHMARK_HAS_CXX11
 template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
   using BenchType =
       internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
   return internal::RegisterBenchmarkInternal(
@@ -1294,7 +1379,7 @@ internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
 #if defined(BENCHMARK_HAS_CXX11) && \
     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
 template <class Lambda, class... Args>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
                                        Args&&... args) {
   return benchmark::RegisterBenchmark(
       name, [=](benchmark::State& st) { fn(st, args...); });
@@ -1308,7 +1393,7 @@ class Fixture : public internal::Benchmark {
  public:
   Fixture() : internal::Benchmark("") {}
 
-  virtual void Run(State& st) BENCHMARK_OVERRIDE {
+  void Run(State& st) BENCHMARK_OVERRIDE {
     this->SetUp(st);
     this->BenchmarkCase(st);
     this->TearDown(st);
@@ -1363,7 +1448,7 @@ class Fixture : public internal::Benchmark {
   BENCHMARK_PRIVATE_DECLARE(_benchmark_) =                           \
       (::benchmark::internal::RegisterBenchmarkInternal(             \
           new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
-                                                       &__VA_ARGS__)))
+                                                       __VA_ARGS__)))
 #else
 #define BENCHMARK(n)                                     \
   BENCHMARK_PRIVATE_DECLARE(n) =                         \
@@ -1430,37 +1515,37 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
 #endif
 
-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)                  \
-  class BaseClass##_##Method##_Benchmark : public BaseClass {           \
-   public:                                                              \
-    BaseClass##_##Method##_Benchmark() {                                \
-      this->SetName(#BaseClass "/" #Method);                            \
-    }                                                                   \
-                                                                        \
-   protected:                                                           \
-    virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)          \
+  class BaseClass##_##Method##_Benchmark : public BaseClass {   \
+   public:                                                      \
+    BaseClass##_##Method##_Benchmark() {                        \
+      this->SetName(#BaseClass "/" #Method);                    \
+    }                                                           \
+                                                                \
+   protected:                                                   \
+    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
   };
 
-#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a)     \
-  class BaseClass##_##Method##_Benchmark : public BaseClass<a> {        \
-   public:                                                              \
-    BaseClass##_##Method##_Benchmark() {                                \
-      this->SetName(#BaseClass "<" #a ">/" #Method);                    \
-    }                                                                   \
-                                                                        \
-   protected:                                                           \
-    virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
+  class BaseClass##_##Method##_Benchmark : public BaseClass<a> {    \
+   public:                                                          \
+    BaseClass##_##Method##_Benchmark() {                            \
+      this->SetName(#BaseClass "<" #a ">/" #Method);                \
+    }                                                               \
+                                                                    \
+   protected:                                                       \
+    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;     \
   };
 
-#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b)  \
-  class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {     \
-   public:                                                              \
-    BaseClass##_##Method##_Benchmark() {                                \
-      this->SetName(#BaseClass "<" #a "," #b ">/" #Method);             \
-    }                                                                   \
-                                                                        \
-   protected:                                                           \
-    virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
+  class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {    \
+   public:                                                             \
+    BaseClass##_##Method##_Benchmark() {                               \
+      this->SetName(#BaseClass "<" #a "," #b ">/" #Method);            \
+    }                                                                  \
+                                                                       \
+   protected:                                                          \
+    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;        \
   };
 
 #ifdef BENCHMARK_HAS_CXX11
@@ -1472,7 +1557,7 @@ class Fixture : public internal::Benchmark {
     }                                                                      \
                                                                            \
    protected:                                                              \
-    virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;    \
+    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;            \
   };
 #else
 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
@@ -1630,7 +1715,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
     Run()
         : run_type(RT_Iteration),
           aggregate_unit(kTime),
-          error_occurred(false),
+          skipped(internal::NotSkipped),
           iterations(1),
           threads(1),
           time_unit(GetDefaultTimeUnit()),
@@ -1653,12 +1738,11 @@ class BENCHMARK_EXPORT BenchmarkReporter {
     std::string aggregate_name;
     StatisticUnit aggregate_unit;
     std::string report_label;  // Empty if not set by benchmark.
-    bool error_occurred;
-    std::string error_message;
+    internal::Skipped skipped;
+    std::string skip_message;
 
     // Total iterations across all threads.
     IterationCount iterations;
-
     int64_t threads;
     int64_t repetition_index;
     int64_t repetitions;
@@ -1728,6 +1812,12 @@ class BENCHMARK_EXPORT BenchmarkReporter {
   virtual bool ReportContext(const Context& context) = 0;
 
   // Called once for each group of benchmark runs, gives information about
+  // the configurations of the runs.
+  virtual void ReportRunsConfig(double /*min_time*/,
+                                bool /*has_explicit_iters*/,
+                                IterationCount /*iters*/) {}
+
+  // Called once for each group of benchmark runs, gives information about
   // cpu-time and heap memory usage during the benchmark run. If the group
   // of runs contained more than two entries then 'report' contains additional
   // elements representing the mean and standard deviation of those runs.
@@ -1784,8 +1874,8 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
   explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
       : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
 
-  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
-  virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+  bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+  void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
 
  protected:
   virtual void PrintRunData(const Run& report);
@@ -1800,9 +1890,9 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
 class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
  public:
   JSONReporter() : first_report_(true) {}
-  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
-  virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
-  virtual void Finalize() BENCHMARK_OVERRIDE;
+  bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+  void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+  void Finalize() BENCHMARK_OVERRIDE;
 
  private:
   void PrintRunData(const Run& report);
@@ -1815,8 +1905,8 @@ class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
     : public BenchmarkReporter {
  public:
   CSVReporter() : printed_header_(false) {}
-  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
-  virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+  bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+  void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
 
  private:
   void PrintRunData(const Run& report);
@@ -1855,7 +1945,7 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) {
 
 // Creates a list of integer values for the given range and multiplier.
 // This can be used together with ArgsProduct() to allow multiple ranges
-// with different multiplers.
+// with different multipliers.
 // Example:
 // ArgsProduct({
 //   CreateRange(0, 1024, /*multi=*/32),
diff --git a/contrib/restricted/google/benchmark/src/benchmark.cc b/contrib/restricted/google/benchmark/src/benchmark.cc
index ff2864804c..f1633b703f 100644
--- a/contrib/restricted/google/benchmark/src/benchmark.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark.cc
@@ -65,16 +65,25 @@ BM_DEFINE_bool(benchmark_list_tests, false);
 // linked into the binary are run.
 BM_DEFINE_string(benchmark_filter, "");
 
-// Minimum number of seconds we should run benchmark before results are
-// considered significant.  For cpu-time based tests, this is the lower bound
+// Specification of how long to run the benchmark.
+//
+// It can be either an exact number of iterations (specified as `<integer>x`),
+// or a minimum number of seconds (specified as `<float>s`). If the latter
+// format (ie., min seconds) is used, the system may run the benchmark longer
+// until the results are considered significant.
+//
+// For backward compatibility, the `s` suffix may be omitted, in which case,
+// the specified number is interpreted as the number of seconds.
+//
+// For cpu-time based tests, this is the lower bound
 // on the total cpu time used by all threads that make up the test.  For
 // real-time based tests, this is the lower bound on the elapsed time of the
 // benchmark execution, regardless of number of threads.
-BM_DEFINE_double(benchmark_min_time, 0.5);
+BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
 
 // Minimum number of seconds a benchmark should be run before results should be
-// taken into account. This e.g can be neccessary for benchmarks of code which
-// needs to fill some form of cache before performance is of interrest.
+// taken into account. This e.g can be necessary for benchmarks of code which
+// needs to fill some form of cache before performance is of interest.
 // Note: results gathered within this period are discarded and not used for
 // reported result.
 BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
@@ -148,18 +157,19 @@ void UseCharPointer(char const volatile*) {}
 
 }  // namespace internal
 
-State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
-             int thread_i, int n_threads, internal::ThreadTimer* timer,
-             internal::ThreadManager* manager,
+State::State(std::string name, IterationCount max_iters,
+             const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+             internal::ThreadTimer* timer, internal::ThreadManager* manager,
              internal::PerfCountersMeasurement* perf_counters_measurement)
     : total_iterations_(0),
       batch_leftover_(0),
       max_iterations(max_iters),
       started_(false),
       finished_(false),
-      error_occurred_(false),
+      skipped_(internal::NotSkipped),
       range_(ranges),
       complexity_n_(0),
+      name_(std::move(name)),
       thread_index_(thread_i),
       threads_(n_threads),
       timer_(timer),
@@ -186,11 +196,14 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
 #pragma nv_diagnostic push
 #pragma nv_diag_suppress 1427
 #endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic push
+#pragma diag_suppress offset_in_non_POD_nonstandard
+#endif
   // Offset tests to ensure commonly accessed data is on the first cache line.
   const int cache_line_size = 64;
-  static_assert(offsetof(State, error_occurred_) <=
-                    (cache_line_size - sizeof(error_occurred_)),
-                "");
+  static_assert(
+      offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
 #if defined(__INTEL_COMPILER)
 #pragma warning pop
 #elif defined(__GNUC__)
@@ -199,11 +212,14 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
 #if defined(__NVCC__)
 #pragma nv_diagnostic pop
 #endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic pop
+#endif
 }
 
 void State::PauseTiming() {
   // Add in time accumulated so far
-  BM_CHECK(started_ && !finished_ && !error_occurred_);
+  BM_CHECK(started_ && !finished_ && !skipped());
   timer_->StopTimer();
   if (perf_counters_measurement_) {
     std::vector<std::pair<std::string, double>> measurements;
@@ -220,21 +236,33 @@ void State::PauseTiming() {
 }
 
 void State::ResumeTiming() {
-  BM_CHECK(started_ && !finished_ && !error_occurred_);
+  BM_CHECK(started_ && !finished_ && !skipped());
   timer_->StartTimer();
   if (perf_counters_measurement_) {
     perf_counters_measurement_->Start();
   }
 }
 
-void State::SkipWithError(const char* msg) {
-  BM_CHECK(msg);
-  error_occurred_ = true;
+void State::SkipWithMessage(const std::string& msg) {
+  skipped_ = internal::SkippedWithMessage;
   {
     MutexLock l(manager_->GetBenchmarkMutex());
-    if (manager_->results.has_error_ == false) {
-      manager_->results.error_message_ = msg;
-      manager_->results.has_error_ = true;
+    if (internal::NotSkipped == manager_->results.skipped_) {
+      manager_->results.skip_message_ = msg;
+      manager_->results.skipped_ = skipped_;
+    }
+  }
+  total_iterations_ = 0;
+  if (timer_->running()) timer_->StopTimer();
+}
+
+void State::SkipWithError(const std::string& msg) {
+  skipped_ = internal::SkippedWithError;
+  {
+    MutexLock l(manager_->GetBenchmarkMutex());
+    if (internal::NotSkipped == manager_->results.skipped_) {
+      manager_->results.skip_message_ = msg;
+      manager_->results.skipped_ = skipped_;
     }
   }
   total_iterations_ = 0;
@@ -245,7 +273,7 @@ void State::SetIterationTime(double seconds) {
   timer_->SetIterationTime(seconds);
 }
 
-void State::SetLabel(const char* label) {
+void State::SetLabel(const std::string& label) {
   MutexLock l(manager_->GetBenchmarkMutex());
   manager_->results.report_label_ = label;
 }
@@ -253,14 +281,14 @@ void State::SetLabel(const char* label) {
 void State::StartKeepRunning() {
   BM_CHECK(!started_ && !finished_);
   started_ = true;
-  total_iterations_ = error_occurred_ ? 0 : max_iterations;
+  total_iterations_ = skipped() ? 0 : max_iterations;
   manager_->StartStopBarrier();
-  if (!error_occurred_) ResumeTiming();
+  if (!skipped()) ResumeTiming();
 }
 
 void State::FinishKeepRunning() {
-  BM_CHECK(started_ && (!finished_ || error_occurred_));
-  if (!error_occurred_) {
+  BM_CHECK(started_ && (!finished_ || skipped()));
+  if (!skipped()) {
     PauseTiming();
   }
   // Total iterations has now wrapped around past 0. Fix this.
@@ -338,14 +366,26 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
 
     size_t num_repetitions_total = 0;
 
+    // This perfcounters object needs to be created before the runners vector
+    // below so it outlasts their lifetime.
+    PerfCountersMeasurement perfcounters(
+        StrSplit(FLAGS_benchmark_perf_counters, ','));
+
+    // Vector of benchmarks to run
     std::vector<internal::BenchmarkRunner> runners;
     runners.reserve(benchmarks.size());
+
+    // Count the number of benchmarks with threads to warn the user in case
+    // performance counters are used.
+    int benchmarks_with_threads = 0;
+
+    // Loop through all benchmarks
     for (const BenchmarkInstance& benchmark : benchmarks) {
       BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
       if (benchmark.complexity() != oNone)
         reports_for_family = &per_family_reports[benchmark.family_index()];
-
-      runners.emplace_back(benchmark, reports_for_family);
+      benchmarks_with_threads += (benchmark.threads() > 0);
+      runners.emplace_back(benchmark, &perfcounters, reports_for_family);
       int num_repeats_of_this_instance = runners.back().GetNumRepeats();
       num_repetitions_total += num_repeats_of_this_instance;
       if (reports_for_family)
@@ -353,6 +393,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
     }
     assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
 
+    // The use of performance counters with threads would be unintuitive for
+    // the average user so we need to warn them about this case
+    if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
+      GetErrorLogInstance()
+          << "***WARNING*** There are " << benchmarks_with_threads
+          << " benchmarks with threads and " << perfcounters.num_counters()
+          << " performance counters were requested. Beware counters will "
+             "reflect the combined usage across all "
+             "threads.\n";
+    }
+
     std::vector<size_t> repetition_indices;
     repetition_indices.reserve(num_repetitions_total);
     for (size_t runner_index = 0, num_runners = runners.size();
@@ -376,6 +427,12 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       if (runner.HasRepeatsRemaining()) continue;
       // FIXME: report each repetition separately, not all of them in bulk.
 
+      display_reporter->ReportRunsConfig(
+          runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+      if (file_reporter)
+        file_reporter->ReportRunsConfig(
+            runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+
       RunResults run_results = runner.GetResults();
 
       // Maybe calculate complexity report
@@ -409,14 +466,15 @@ std::unique_ptr<BenchmarkReporter> CreateReporter(
   typedef std::unique_ptr<BenchmarkReporter> PtrType;
   if (name == "console") {
     return PtrType(new ConsoleReporter(output_opts));
-  } else if (name == "json") {
+  }
+  if (name == "json") {
     return PtrType(new JSONReporter());
-  } else if (name == "csv") {
+  }
+  if (name == "csv") {
     return PtrType(new CSVReporter());
-  } else {
-    std::cerr << "Unexpected format: '" << name << "'\n";
-    std::exit(1);
   }
+  std::cerr << "Unexpected format: '" << name << "'\n";
+  std::exit(1);
 }
 
 BENCHMARK_RESTORE_DEPRECATED_WARNING
@@ -585,13 +643,17 @@ void PrintUsageAndExit() {
 void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
   if (time_unit_flag == "s") {
     return SetDefaultTimeUnit(kSecond);
-  } else if (time_unit_flag == "ms") {
+  }
+  if (time_unit_flag == "ms") {
     return SetDefaultTimeUnit(kMillisecond);
-  } else if (time_unit_flag == "us") {
+  }
+  if (time_unit_flag == "us") {
     return SetDefaultTimeUnit(kMicrosecond);
-  } else if (time_unit_flag == "ns") {
+  }
+  if (time_unit_flag == "ns") {
     return SetDefaultTimeUnit(kNanosecond);
-  } else if (!time_unit_flag.empty()) {
+  }
+  if (!time_unit_flag.empty()) {
     PrintUsageAndExit();
   }
 }
@@ -604,7 +666,7 @@ void ParseCommandLineFlags(int* argc, char** argv) {
     if (ParseBoolFlag(argv[i], "benchmark_list_tests",
                       &FLAGS_benchmark_list_tests) ||
         ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
-        ParseDoubleFlag(argv[i], "benchmark_min_time",
+        ParseStringFlag(argv[i], "benchmark_min_time",
                         &FLAGS_benchmark_min_time) ||
         ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
                         &FLAGS_benchmark_min_warmup_time) ||
@@ -665,7 +727,7 @@ void PrintDefaultHelp() {
           "benchmark"
           " [--benchmark_list_tests={true|false}]\n"
           "          [--benchmark_filter=<regex>]\n"
-          "          [--benchmark_min_time=<min_time>]\n"
+          "          [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
           "          [--benchmark_min_warmup_time=<min_warmup_time>]\n"
           "          [--benchmark_repetitions=<num_repetitions>]\n"
           "          [--benchmark_enable_random_interleaving={true|false}]\n"
@@ -676,6 +738,9 @@ void PrintDefaultHelp() {
           "          [--benchmark_out_format=<json|console|csv>]\n"
           "          [--benchmark_color={auto|true|false}]\n"
           "          [--benchmark_counters_tabular={true|false}]\n"
+#if defined HAVE_LIBPFM
+          "          [--benchmark_perf_counters=<counter>,...]\n"
+#endif
           "          [--benchmark_context=<key>=<value>,...]\n"
           "          [--benchmark_time_unit={ns|us|ms|s}]\n"
           "          [--v=<verbosity>]\n");
diff --git a/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc b/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc
index 963fea22f3..286f986530 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_api_internal.cc
@@ -93,24 +93,24 @@ State BenchmarkInstance::Run(
     IterationCount iters, int thread_id, internal::ThreadTimer* timer,
     internal::ThreadManager* manager,
     internal::PerfCountersMeasurement* perf_counters_measurement) const {
-  State st(iters, args_, thread_id, threads_, timer, manager,
-           perf_counters_measurement);
+  State st(name_.function_name, iters, args_, thread_id, threads_, timer,
+           manager, perf_counters_measurement);
   benchmark_.Run(st);
   return st;
 }
 
 void BenchmarkInstance::Setup() const {
   if (setup_) {
-    State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
-             nullptr);
+    State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+             nullptr, nullptr, nullptr);
     setup_(st);
   }
 }
 
 void BenchmarkInstance::Teardown() const {
   if (teardown_) {
-    State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
-             nullptr);
+    State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+             nullptr, nullptr, nullptr);
     teardown_(st);
   }
 }
diff --git a/contrib/restricted/google/benchmark/src/benchmark_register.cc b/contrib/restricted/google/benchmark/src/benchmark_register.cc
index eae2c320f6..e447c9a2d3 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_register.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_register.cc
@@ -204,7 +204,7 @@ bool FindBenchmarksInternal(const std::string& re,
 //                               Benchmark
 //=============================================================================//
 
-Benchmark::Benchmark(const char* name)
+Benchmark::Benchmark(const std::string& name)
     : name_(name),
       aggregation_report_mode_(ARM_Unspecified),
       time_unit_(GetDefaultTimeUnit()),
@@ -230,7 +230,7 @@ Benchmark::Benchmark(const char* name)
 Benchmark::~Benchmark() {}
 
 Benchmark* Benchmark::Name(const std::string& name) {
-  SetName(name.c_str());
+  SetName(name);
   return this;
 }
 
@@ -468,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() {
   return this;
 }
 
-void Benchmark::SetName(const char* name) { name_ = name; }
+void Benchmark::SetName(const std::string& name) { name_ = name; }
+
+const char* Benchmark::GetName() const { return name_.c_str(); }
 
 int Benchmark::ArgsCnt() const {
   if (args_.empty()) {
@@ -478,6 +480,12 @@ int Benchmark::ArgsCnt() const {
   return static_cast<int>(args_.front().size());
 }
 
+const char* Benchmark::GetArgName(int arg) const {
+  BM_CHECK_GE(arg, 0);
+  BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
+  return arg_names_[arg].c_str();
+}
+
 TimeUnit Benchmark::GetTimeUnit() const {
   return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
 }
diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.cc b/contrib/restricted/google/benchmark/src/benchmark_runner.cc
index f9ffbc5afb..5f683fe423 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_runner.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_runner.cc
@@ -28,11 +28,14 @@
 
 #include <algorithm>
 #include <atomic>
+#include <climits>
+#include <cmath>
 #include <condition_variable>
 #include <cstdio>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <memory>
 #include <string>
 #include <thread>
@@ -62,6 +65,8 @@ MemoryManager* memory_manager = nullptr;
 namespace {
 
 static constexpr IterationCount kMaxIterations = 1000000000;
+const double kDefaultMinTime =
+    std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
 
 BenchmarkReporter::Run CreateRunReport(
     const benchmark::internal::BenchmarkInstance& b,
@@ -75,8 +80,8 @@ BenchmarkReporter::Run CreateRunReport(
   report.run_name = b.name();
   report.family_index = b.family_index();
   report.per_family_instance_index = b.per_family_instance_index();
-  report.error_occurred = results.has_error_;
-  report.error_message = results.error_message_;
+  report.skipped = results.skipped_;
+  report.skip_message = results.skip_message_;
   report.report_label = results.report_label_;
   // This is the total iterations across all threads.
   report.iterations = results.iterations;
@@ -85,7 +90,7 @@ BenchmarkReporter::Run CreateRunReport(
   report.repetition_index = repetition_index;
   report.repetitions = repeats;
 
-  if (!report.error_occurred) {
+  if (!report.skipped) {
     // This is the total time across all threads.
     if (b.use_manual_time()) {
       report.real_accumulated_time = results.manual_time_used;
@@ -126,7 +131,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
 
   State st =
       b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
-  BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
+  BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
       << "Benchmark returned before State::KeepRunning() returned false!";
   {
     MutexLock l(manager->GetBenchmarkMutex());
@@ -141,27 +146,100 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
   manager->NotifyThreadComplete();
 }
 
+double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
+                      const BenchTimeType& iters_or_time) {
+  if (!IsZero(b.min_time())) return b.min_time();
+  // If the flag was used to specify number of iters, then return the default
+  // min_time.
+  if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
+
+  return iters_or_time.time;
+}
+
+IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
+                            const BenchTimeType& iters_or_time) {
+  if (b.iterations() != 0) return b.iterations();
+
+  // We've already concluded that this flag is currently used to pass
+  // iters but do a check here again anyway.
+  BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
+  return iters_or_time.iters;
+}
+
 }  // end namespace
 
+BenchTimeType ParseBenchMinTime(const std::string& value) {
+  BenchTimeType ret;
+
+  if (value.empty()) {
+    ret.tag = BenchTimeType::TIME;
+    ret.time = 0.0;
+    return ret;
+  }
+
+  if (value.back() == 'x') {
+    char* p_end;
+    // Reset errno before it's changed by strtol.
+    errno = 0;
+    IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
+
+    // After a valid parse, p_end should have been set to
+    // point to the 'x' suffix.
+    BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
+        << "Malformed iters value passed to --benchmark_min_time: `" << value
+        << "`. Expected --benchmark_min_time=<integer>x.";
+
+    ret.tag = BenchTimeType::ITERS;
+    ret.iters = num_iters;
+    return ret;
+  }
+
+  bool has_suffix = value.back() == 's';
+  if (!has_suffix) {
+    BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
+                  "Eg., `30s` for 30-seconds.";
+  }
+
+  char* p_end;
+  // Reset errno before it's changed by strtod.
+  errno = 0;
+  double min_time = std::strtod(value.c_str(), &p_end);
+
+  // After a successful parse, p_end should point to the suffix 's',
+  // or the end of the string if the suffix was omitted.
+  BM_CHECK(errno == 0 && p_end != nullptr &&
+           ((has_suffix && *p_end == 's') || *p_end == '\0'))
+      << "Malformed seconds value passed to --benchmark_min_time: `" << value
+      << "`. Expected --benchmark_min_time=<float>x.";
+
+  ret.tag = BenchTimeType::TIME;
+  ret.time = min_time;
+
+  return ret;
+}
+
 BenchmarkRunner::BenchmarkRunner(
     const benchmark::internal::BenchmarkInstance& b_,
+    PerfCountersMeasurement* pcm_,
     BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
     : b(b_),
       reports_for_family(reports_for_family_),
-      min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
+      parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
+      min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
       min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
                           ? b.min_warmup_time()
                           : FLAGS_benchmark_min_warmup_time),
       warmup_done(!(min_warmup_time > 0.0)),
       repeats(b.repetitions() != 0 ? b.repetitions()
                                    : FLAGS_benchmark_repetitions),
-      has_explicit_iteration_count(b.iterations() != 0),
+      has_explicit_iteration_count(b.iterations() != 0 ||
+                                   parsed_benchtime_flag.tag ==
+                                       BenchTimeType::ITERS),
       pool(b.threads() - 1),
-      iters(has_explicit_iteration_count ? b.iterations() : 1),
-      perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')),
-      perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
-                                        ? &perf_counters_measurement
-                                        : nullptr) {
+      iters(has_explicit_iteration_count
+                ? ComputeIters(b_, parsed_benchtime_flag)
+                : 1),
+      perf_counters_measurement_ptr(pcm_) {
   run_results.display_report_aggregates_only =
       (FLAGS_benchmark_report_aggregates_only ||
        FLAGS_benchmark_display_aggregates_only);
@@ -174,7 +252,7 @@ BenchmarkRunner::BenchmarkRunner(
     run_results.file_report_aggregates_only =
         (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
     BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
-             perf_counters_measurement.IsValid())
+             (perf_counters_measurement_ptr->num_counters() == 0))
         << "Perf counters were requested but could not be set up.";
   }
 }
@@ -263,7 +341,7 @@ bool BenchmarkRunner::ShouldReportIterationResults(
   // Determine if this run should be reported;
   // Either it has run for a sufficient amount of time
   // or because an error was reported.
-  return i.results.has_error_ ||
+  return i.results.skipped_ ||
          i.iters >= kMaxIterations ||  // Too many iterations already.
          i.seconds >=
              GetMinTimeToApply() ||  // The elapsed time is large enough.
@@ -389,10 +467,7 @@ void BenchmarkRunner::DoOneRepetition() {
     manager->WaitForAllThreads();
     manager.reset();
     b.Teardown();
-
-    BENCHMARK_DISABLE_DEPRECATED_WARNING
-    memory_manager->Stop(memory_result);
-    BENCHMARK_RESTORE_DEPRECATED_WARNING
+    memory_manager->Stop(*memory_result);
   }
 
   // Ok, now actually report.
@@ -402,7 +477,7 @@ void BenchmarkRunner::DoOneRepetition() {
 
   if (reports_for_family) {
     ++reports_for_family->num_runs_done;
-    if (!report.error_occurred) reports_for_family->Runs.push_back(report);
+    if (!report.skipped) reports_for_family->Runs.push_back(report);
   }
 
   run_results.non_aggregates.push_back(report);
diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.h b/contrib/restricted/google/benchmark/src/benchmark_runner.h
index 0174bd3401..db2fa04396 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_runner.h
+++ b/contrib/restricted/google/benchmark/src/benchmark_runner.h
@@ -25,7 +25,7 @@
 
 namespace benchmark {
 
-BM_DECLARE_double(benchmark_min_time);
+BM_DECLARE_string(benchmark_min_time);
 BM_DECLARE_double(benchmark_min_warmup_time);
 BM_DECLARE_int32(benchmark_repetitions);
 BM_DECLARE_bool(benchmark_report_aggregates_only);
@@ -44,9 +44,21 @@ struct RunResults {
   bool file_report_aggregates_only = false;
 };
 
+struct BENCHMARK_EXPORT BenchTimeType {
+  enum { ITERS, TIME } tag;
+  union {
+    IterationCount iters;
+    double time;
+  };
+};
+
+BENCHMARK_EXPORT
+BenchTimeType ParseBenchMinTime(const std::string& value);
+
 class BenchmarkRunner {
  public:
   BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
+                  benchmark::internal::PerfCountersMeasurement* pmc_,
                   BenchmarkReporter::PerFamilyRunReports* reports_for_family);
 
   int GetNumRepeats() const { return repeats; }
@@ -63,12 +75,19 @@ class BenchmarkRunner {
     return reports_for_family;
   }
 
+  double GetMinTime() const { return min_time; }
+
+  bool HasExplicitIters() const { return has_explicit_iteration_count; }
+
+  IterationCount GetIters() const { return iters; }
+
  private:
   RunResults run_results;
 
   const benchmark::internal::BenchmarkInstance& b;
   BenchmarkReporter::PerFamilyRunReports* reports_for_family;
 
+  BenchTimeType parsed_benchtime_flag;
   const double min_time;
   const double min_warmup_time;
   bool warmup_done;
@@ -85,8 +104,7 @@ class BenchmarkRunner {
   // So only the first repetition has to find/calculate it,
   // the other repetitions will just use that precomputed iteration count.
 
-  PerfCountersMeasurement perf_counters_measurement;
-  PerfCountersMeasurement* const perf_counters_measurement_ptr;
+  PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
 
   struct IterationResults {
     internal::ThreadManager::Result results;
diff --git a/contrib/restricted/google/benchmark/src/colorprint.cc b/contrib/restricted/google/benchmark/src/colorprint.cc
index 1a000a0637..9a653c5007 100644
--- a/contrib/restricted/google/benchmark/src/colorprint.cc
+++ b/contrib/restricted/google/benchmark/src/colorprint.cc
@@ -96,18 +96,18 @@ std::string FormatString(const char* msg, va_list args) {
   // currently there is no error handling for failure, so this is hack.
   BM_CHECK(ret >= 0);
 
-  if (ret == 0)  // handle empty expansion
+  if (ret == 0) {  // handle empty expansion
     return {};
-  else if (static_cast<size_t>(ret) < size)
+  }
+  if (static_cast<size_t>(ret) < size) {
     return local_buff;
-  else {
-    // we did not provide a long enough buffer on our first attempt.
-    size = static_cast<size_t>(ret) + 1;  // + 1 for the null byte
-    std::unique_ptr<char[]> buff(new char[size]);
-    ret = vsnprintf(buff.get(), size, msg, args);
-    BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
-    return buff.get();
   }
+  // we did not provide a long enough buffer on our first attempt.
+  size = static_cast<size_t>(ret) + 1;  // + 1 for the null byte
+  std::unique_ptr<char[]> buff(new char[size]);
+  ret = vsnprintf(buff.get(), size, msg, args);
+  BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
+  return buff.get();
 }
 
 std::string FormatString(const char* msg, ...) {
@@ -163,7 +163,7 @@ bool IsColorTerminal() {
 #else
   // On non-Windows platforms, we rely on the TERM variable. This list of
   // supported TERM values is copied from Google Test:
-  // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
+  // <https://github.com/google/googletest/blob/main/googletest/src/gtest.cc#L2925>.
   const char* const SUPPORTED_TERM_VALUES[] = {
       "xterm",         "xterm-color",     "xterm-256color",
       "screen",        "screen-256color", "tmux",
diff --git a/contrib/restricted/google/benchmark/src/commandlineflags.cc b/contrib/restricted/google/benchmark/src/commandlineflags.cc
index 1f555b2757..dcb414959d 100644
--- a/contrib/restricted/google/benchmark/src/commandlineflags.cc
+++ b/contrib/restricted/google/benchmark/src/commandlineflags.cc
@@ -284,14 +284,15 @@ bool IsTruthyFlagValue(const std::string& value) {
     char v = value[0];
     return isalnum(v) &&
            !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
-  } else if (!value.empty()) {
+  }
+  if (!value.empty()) {
     std::string value_lower(value);
     std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
                    [](char c) { return static_cast<char>(::tolower(c)); });
     return !(value_lower == "false" || value_lower == "no" ||
              value_lower == "off");
-  } else
-    return true;
+  }
+  return true;
 }
 
 }  // end namespace benchmark
diff --git a/contrib/restricted/google/benchmark/src/complexity.h b/contrib/restricted/google/benchmark/src/complexity.h
index df29b48d29..0a0679b48b 100644
--- a/contrib/restricted/google/benchmark/src/complexity.h
+++ b/contrib/restricted/google/benchmark/src/complexity.h
@@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
     const std::vector<BenchmarkReporter::Run>& reports);
 
 // This data structure will contain the result returned by MinimalLeastSq
-//   - coef        : Estimated coeficient for the high-order term as
+//   - coef        : Estimated coefficient for the high-order term as
 //                   interpolated from data.
 //   - rms         : Normalized Root Mean Squared Error.
 //   - complexity  : Scalability form (e.g. oN, oNLogN). In case a scalability
diff --git a/contrib/restricted/google/benchmark/src/console_reporter.cc b/contrib/restricted/google/benchmark/src/console_reporter.cc
index 3950e49814..10e05e133e 100644
--- a/contrib/restricted/google/benchmark/src/console_reporter.cc
+++ b/contrib/restricted/google/benchmark/src/console_reporter.cc
@@ -115,7 +115,7 @@ static std::string FormatTime(double time) {
   if (time < 100.0) {
     return FormatString("%10.1f", time);
   }
-  // Assuming the time ist at max 9.9999e+99 and we have 10 digits for the
+  // Assuming the time is at max 9.9999e+99 and we have 10 digits for the
   // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
   if (time > 9999999999 /*max 10 digit number*/) {
     return FormatString("%1.4e", time);
@@ -135,9 +135,13 @@ void ConsoleReporter::PrintRunData(const Run& result) {
   printer(Out, name_color, "%-*s ", name_field_width_,
           result.benchmark_name().c_str());
 
-  if (result.error_occurred) {
+  if (internal::SkippedWithError == result.skipped) {
     printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
-            result.error_message.c_str());
+            result.skip_message.c_str());
+    printer(Out, COLOR_DEFAULT, "\n");
+    return;
+  } else if (internal::SkippedWithMessage == result.skipped) {
+    printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
     printer(Out, COLOR_DEFAULT, "\n");
     return;
   }
diff --git a/contrib/restricted/google/benchmark/src/csv_reporter.cc b/contrib/restricted/google/benchmark/src/csv_reporter.cc
index 83c94573f5..7b56da107e 100644
--- a/contrib/restricted/google/benchmark/src/csv_reporter.cc
+++ b/contrib/restricted/google/benchmark/src/csv_reporter.cc
@@ -109,10 +109,10 @@ BENCHMARK_EXPORT
 void CSVReporter::PrintRunData(const Run& run) {
   std::ostream& Out = GetOutputStream();
   Out << CsvEscape(run.benchmark_name()) << ",";
-  if (run.error_occurred) {
+  if (run.skipped) {
     Out << std::string(elements.size() - 3, ',');
-    Out << "true,";
-    Out << CsvEscape(run.error_message) << "\n";
+    Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
+    Out << CsvEscape(run.skip_message) << "\n";
     return;
   }
 
diff --git a/contrib/restricted/google/benchmark/src/cycleclock.h b/contrib/restricted/google/benchmark/src/cycleclock.h
index 827f21b927..1295880b2e 100644
--- a/contrib/restricted/google/benchmark/src/cycleclock.h
+++ b/contrib/restricted/google/benchmark/src/cycleclock.h
@@ -36,7 +36,8 @@
 // declarations of some other intrinsics, breaking compilation.
 // Therefore, we simply declare __rdtsc ourselves. See also
 // http://connect.microsoft.com/VisualStudio/feedback/details/262047
-#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64)
+#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
+    !defined(_M_ARM64EC)
 extern "C" uint64_t __rdtsc();
 #pragma intrinsic(__rdtsc)
 #endif
@@ -114,7 +115,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   // when I know it will work.  Otherwise, I'll use __rdtsc and hope
   // the code is being compiled with a non-ancient compiler.
   _asm rdtsc
-#elif defined(COMPILER_MSVC) && defined(_M_ARM64)
+#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
   // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
   // and https://reviews.llvm.org/D53115
   int64_t virtual_timer_value;
diff --git a/contrib/restricted/google/benchmark/src/internal_macros.h b/contrib/restricted/google/benchmark/src/internal_macros.h
index 396a390afb..8dd7d0c650 100644
--- a/contrib/restricted/google/benchmark/src/internal_macros.h
+++ b/contrib/restricted/google/benchmark/src/internal_macros.h
@@ -42,6 +42,12 @@
   #define BENCHMARK_OS_CYGWIN 1
 #elif defined(_WIN32)
   #define BENCHMARK_OS_WINDOWS 1
+  // WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
+  // We include windows.h which implicitly includes winapifamily.h for compatibility.
+  #ifndef NOMINMAX
+    #define NOMINMAX
+  #endif
+  #include <windows.h>
   #if defined(WINAPI_FAMILY_PARTITION)
     #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
       #define BENCHMARK_OS_WINDOWS_WIN32 1
diff --git a/contrib/restricted/google/benchmark/src/json_reporter.cc b/contrib/restricted/google/benchmark/src/json_reporter.cc
index d55a0e6f0b..6559dfd5e6 100644
--- a/contrib/restricted/google/benchmark/src/json_reporter.cc
+++ b/contrib/restricted/google/benchmark/src/json_reporter.cc
@@ -254,9 +254,12 @@ void JSONReporter::PrintRunData(Run const& run) {
       BENCHMARK_UNREACHABLE();
     }()) << ",\n";
   }
-  if (run.error_occurred) {
-    out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
-    out << indent << FormatKV("error_message", run.error_message) << ",\n";
+  if (internal::SkippedWithError == run.skipped) {
+    out << indent << FormatKV("error_occurred", true) << ",\n";
+    out << indent << FormatKV("error_message", run.skip_message) << ",\n";
+  } else if (internal::SkippedWithMessage == run.skipped) {
+    out << indent << FormatKV("skipped", true) << ",\n";
+    out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
   }
   if (!run.report_big_o && !run.report_rms) {
     out << indent << FormatKV("iterations", run.iterations) << ",\n";
@@ -294,7 +297,8 @@ void JSONReporter::PrintRunData(Run const& run) {
     out << ",\n"
         << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
 
-    auto report_if_present = [&out, &indent](const char* label, int64_t val) {
+    auto report_if_present = [&out, &indent](const std::string& label,
+                                             int64_t val) {
       if (val != MemoryManager::TombstoneValue)
         out << ",\n" << indent << FormatKV(label, val);
     };
diff --git a/contrib/restricted/google/benchmark/src/perf_counters.cc b/contrib/restricted/google/benchmark/src/perf_counters.cc
index 8a60088ba7..5f2ac282ab 100644
--- a/contrib/restricted/google/benchmark/src/perf_counters.cc
+++ b/contrib/restricted/google/benchmark/src/perf_counters.cc
@@ -29,96 +29,215 @@ namespace internal {
 constexpr size_t PerfCounterValues::kMaxCounters;
 
 #if defined HAVE_LIBPFM
+
+size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
+  // Create a pointer for multiple reads
+  const size_t bufsize = values_.size() * sizeof(values_[0]);
+  char* ptr = reinterpret_cast<char*>(values_.data());
+  size_t size = bufsize;
+  for (int lead : leaders) {
+    auto read_bytes = ::read(lead, ptr, size);
+    if (read_bytes >= ssize_t(sizeof(uint64_t))) {
+      // Actual data bytes are all bytes minus initial padding
+      std::size_t data_bytes = read_bytes - sizeof(uint64_t);
+      // This should be very cheap since it's in hot cache
+      std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
+      // Increment our counters
+      ptr += data_bytes;
+      size -= data_bytes;
+    } else {
+      int err = errno;
+      GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
+                            << " " << ::strerror(err) << "\n";
+      return 0;
+    }
+  }
+  return (bufsize - size) / sizeof(uint64_t);
+}
+
 const bool PerfCounters::kSupported = true;
 
 bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
 
+bool PerfCounters::IsCounterSupported(const std::string& name) {
+  perf_event_attr_t attr;
+  std::memset(&attr, 0, sizeof(attr));
+  pfm_perf_encode_arg_t arg;
+  std::memset(&arg, 0, sizeof(arg));
+  arg.attr = &attr;
+  const int mode = PFM_PLM3;  // user mode only
+  int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
+                                      &arg);
+  return (ret == PFM_SUCCESS);
+}
+
 PerfCounters PerfCounters::Create(
     const std::vector<std::string>& counter_names) {
-  if (counter_names.empty()) {
-    return NoCounters();
-  }
-  if (counter_names.size() > PerfCounterValues::kMaxCounters) {
-    GetErrorLogInstance()
-        << counter_names.size()
-        << " counters were requested. The minimum is 1, the maximum is "
-        << PerfCounterValues::kMaxCounters << "\n";
-    return NoCounters();
-  }
-  std::vector<int> counter_ids(counter_names.size());
+  // Valid counters will populate these arrays but we start empty
+  std::vector<std::string> valid_names;
+  std::vector<int> counter_ids;
+  std::vector<int> leader_ids;
 
-  const int mode = PFM_PLM3;  // user mode only
+  // Resize to the maximum possible
+  valid_names.reserve(counter_names.size());
+  counter_ids.reserve(counter_names.size());
+
+  const int kCounterMode = PFM_PLM3;  // user mode only
+
+  // Group leads will be assigned on demand. The idea is that once we cannot
+  // create a counter descriptor, the reason is that this group has maxed out
+  // so we set the group_id again to -1 and retry - giving the algorithm a
+  // chance to create a new group leader to hold the next set of counters.
+  int group_id = -1;
+
+  // Loop through all performance counters
   for (size_t i = 0; i < counter_names.size(); ++i) {
-    const bool is_first = i == 0;
-    struct perf_event_attr attr {};
-    attr.size = sizeof(attr);
-    const int group_id = !is_first ? counter_ids[0] : -1;
+    // we are about to push into the valid names vector
+    // check if we did not reach the maximum
+    if (valid_names.size() == PerfCounterValues::kMaxCounters) {
+      // Log a message if we maxed out and stop adding
+      GetErrorLogInstance()
+          << counter_names.size() << " counters were requested. The maximum is "
+          << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
+          << " were already added. All remaining counters will be ignored\n";
+      // stop the loop and return what we have already
+      break;
+    }
+
+    // Check if this name is empty
     const auto& name = counter_names[i];
     if (name.empty()) {
-      GetErrorLogInstance() << "A counter name was the empty string\n";
-      return NoCounters();
+      GetErrorLogInstance()
+          << "A performance counter name was the empty string\n";
+      continue;
     }
+
+    // Here first means first in group, ie the group leader
+    const bool is_first = (group_id < 0);
+
+    // This struct will be populated by libpfm from the counter string
+    // and then fed into the syscall perf_event_open
+    struct perf_event_attr attr {};
+    attr.size = sizeof(attr);
+
+    // This is the input struct to libpfm.
     pfm_perf_encode_arg_t arg{};
     arg.attr = &attr;
-
-    const int pfm_get =
-        pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
+    const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
+                                                  PFM_OS_PERF_EVENT, &arg);
     if (pfm_get != PFM_SUCCESS) {
-      GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
-      return NoCounters();
+      GetErrorLogInstance()
+          << "Unknown performance counter name: " << name << "\n";
+      continue;
     }
-    attr.disabled = is_first;
-    // Note: the man page for perf_event_create suggests inerit = true and
+
+    // We then proceed to populate the remaining fields in our attribute struct
+    // Note: the man page for perf_event_create suggests inherit = true and
     // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
     // case.
+    attr.disabled = is_first;
     attr.inherit = true;
     attr.pinned = is_first;
     attr.exclude_kernel = true;
     attr.exclude_user = false;
     attr.exclude_hv = true;
-    // Read all counters in one read.
+
+    // Read all counters in a group in one read.
     attr.read_format = PERF_FORMAT_GROUP;
 
     int id = -1;
-    static constexpr size_t kNrOfSyscallRetries = 5;
-    // Retry syscall as it was interrupted often (b/64774091).
-    for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
-         ++num_retries) {
-      id = perf_event_open(&attr, 0, -1, group_id, 0);
-      if (id >= 0 || errno != EINTR) {
-        break;
+    while (id < 0) {
+      static constexpr size_t kNrOfSyscallRetries = 5;
+      // Retry syscall as it was interrupted often (b/64774091).
+      for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
+           ++num_retries) {
+        id = perf_event_open(&attr, 0, -1, group_id, 0);
+        if (id >= 0 || errno != EINTR) {
+          break;
+        }
+      }
+      if (id < 0) {
+        // If the file descriptor is negative we might have reached a limit
+        // in the current group. Set the group_id to -1 and retry
+        if (group_id >= 0) {
+          // Create a new group
+          group_id = -1;
+        } else {
+          // At this point we have already retried to set a new group id and
+          // failed. We then give up.
+          break;
+        }
       }
     }
+
+    // We failed to get a new file descriptor. We might have reached a hard
+    // hardware limit that cannot be resolved even with group multiplexing
     if (id < 0) {
-      GetErrorLogInstance()
-          << "Failed to get a file descriptor for " << name << "\n";
-      return NoCounters();
-    }
+      GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
+                               "for performance counter "
+                            << name << ". Ignoring\n";
 
-    counter_ids[i] = id;
+      // We give up on this counter but try to keep going
+      // as the others would be fine
+      continue;
+    }
+    if (group_id < 0) {
+      // This is a leader, store and assign it to the current file descriptor
+      leader_ids.push_back(id);
+      group_id = id;
+    }
+    // This is a valid counter, add it to our descriptor's list
+    counter_ids.push_back(id);
+    valid_names.push_back(name);
   }
-  if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
-    GetErrorLogInstance() << "Failed to start counters\n";
-    return NoCounters();
+
+  // Loop through all group leaders activating them
+  // There is another option of starting ALL counters in a process but
+  // that would be far reaching an intrusion. If the user is using PMCs
+  // by themselves then this would have a side effect on them. It is
+  // friendlier to loop through all groups individually.
+  for (int lead : leader_ids) {
+    if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
+      // This should never happen but if it does, we give up on the
+      // entire batch as recovery would be a mess.
+      GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
+                               "Claring out all counters.\n";
+
+      // Close all peformance counters
+      for (int id : counter_ids) {
+        ::close(id);
+      }
+
+      // Return an empty object so our internal state is still good and
+      // the process can continue normally without impact
+      return NoCounters();
+    }
   }
 
-  return PerfCounters(counter_names, std::move(counter_ids));
+  return PerfCounters(std::move(valid_names), std::move(counter_ids),
+                      std::move(leader_ids));
 }
 
 void PerfCounters::CloseCounters() const {
   if (counter_ids_.empty()) {
     return;
   }
-  ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
+  for (int lead : leader_ids_) {
+    ioctl(lead, PERF_EVENT_IOC_DISABLE);
+  }
   for (int fd : counter_ids_) {
     close(fd);
   }
 }
 #else   // defined HAVE_LIBPFM
+size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
+
 const bool PerfCounters::kSupported = false;
 
 bool PerfCounters::Initialize() { return false; }
 
+bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
+
 PerfCounters PerfCounters::Create(
     const std::vector<std::string>& counter_names) {
   if (!counter_names.empty()) {
@@ -130,31 +249,10 @@ PerfCounters PerfCounters::Create(
 void PerfCounters::CloseCounters() const {}
 #endif  // defined HAVE_LIBPFM
 
-Mutex PerfCountersMeasurement::mutex_;
-int PerfCountersMeasurement::ref_count_ = 0;
-PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
-
 PerfCountersMeasurement::PerfCountersMeasurement(
     const std::vector<std::string>& counter_names)
     : start_values_(counter_names.size()), end_values_(counter_names.size()) {
-  MutexLock l(mutex_);
-  if (ref_count_ == 0) {
-    counters_ = PerfCounters::Create(counter_names);
-  }
-  // We chose to increment it even if `counters_` ends up invalid,
-  // so that we don't keep trying to create, and also since the dtor
-  // will decrement regardless of `counters_`'s validity
-  ++ref_count_;
-
-  BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names);
-}
-
-PerfCountersMeasurement::~PerfCountersMeasurement() {
-  MutexLock l(mutex_);
-  --ref_count_;
-  if (ref_count_ == 0) {
-    counters_ = PerfCounters::NoCounters();
-  }
+  counters_ = PerfCounters::Create(counter_names);
 }
 
 PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
@@ -162,6 +260,7 @@ PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
     CloseCounters();
 
     counter_ids_ = std::move(other.counter_ids_);
+    leader_ids_ = std::move(other.leader_ids_);
     counter_names_ = std::move(other.counter_names_);
   }
   return *this;
diff --git a/contrib/restricted/google/benchmark/src/perf_counters.h b/contrib/restricted/google/benchmark/src/perf_counters.h
index 680555d4b0..152a6f2561 100644
--- a/contrib/restricted/google/benchmark/src/perf_counters.h
+++ b/contrib/restricted/google/benchmark/src/perf_counters.h
@@ -17,6 +17,7 @@
 
 #include <array>
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <vector>
 
@@ -44,18 +45,21 @@ namespace internal {
 // The implementation ensures the storage is inlined, and allows 0-based
 // indexing into the counter values.
 // The object is used in conjunction with a PerfCounters object, by passing it
-// to Snapshot(). The values are populated such that
-// perfCounters->names()[i]'s value is obtained at position i (as given by
-// operator[]) of this object.
-class PerfCounterValues {
+// to Snapshot(). The Read() method relocates individual reads, discarding
+// the initial padding from each group leader in the values buffer such that
+// all user accesses through the [] operator are correct.
+class BENCHMARK_EXPORT PerfCounterValues {
  public:
   explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
     BM_CHECK_LE(nr_counters_, kMaxCounters);
   }
 
-  uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
+  // We are reading correctly now so the values don't need to skip padding
+  uint64_t operator[](size_t pos) const { return values_[pos]; }
 
-  static constexpr size_t kMaxCounters = 3;
+  // Increased the maximum to 32 only since the buffer
+  // is std::array<> backed
+  static constexpr size_t kMaxCounters = 32;
 
  private:
   friend class PerfCounters;
@@ -66,7 +70,14 @@ class PerfCounterValues {
             sizeof(uint64_t) * (kPadding + nr_counters_)};
   }
 
-  static constexpr size_t kPadding = 1;
+  // This reading is complex and as the goal of this class is to
+  // abstract away the intrincacies of the reading process, this is
+  // a better place for it
+  size_t Read(const std::vector<int>& leaders);
+
+  // Move the padding to 2 due to the reading algorithm (1st padding plus a
+  // current read padding)
+  static constexpr size_t kPadding = 2;
   std::array<uint64_t, kPadding + kMaxCounters> values_;
   const size_t nr_counters_;
 };
@@ -79,10 +90,11 @@ class BENCHMARK_EXPORT PerfCounters final {
   // True iff this platform supports performance counters.
   static const bool kSupported;
 
-  bool IsValid() const { return !counter_names_.empty(); }
+  // Returns an empty object
   static PerfCounters NoCounters() { return PerfCounters(); }
 
   ~PerfCounters() { CloseCounters(); }
+  PerfCounters() = default;
   PerfCounters(PerfCounters&&) = default;
   PerfCounters(const PerfCounters&) = delete;
   PerfCounters& operator=(PerfCounters&&) noexcept;
@@ -92,11 +104,15 @@ class BENCHMARK_EXPORT PerfCounters final {
   // initialization here.
   static bool Initialize();
 
+  // Check if the given counter is supported, if the app wants to
+  // check before passing
+  static bool IsCounterSupported(const std::string& name);
+
   // Return a PerfCounters object ready to read the counters with the names
   // specified. The values are user-mode only. The counter name format is
   // implementation and OS specific.
-  // TODO: once we move to C++-17, this should be a std::optional, and then the
-  // IsValid() boolean can be dropped.
+  // In case of failure, this method will in the worst case return an
+  // empty object whose state will still be valid.
   static PerfCounters Create(const std::vector<std::string>& counter_names);
 
   // Take a snapshot of the current value of the counters into the provided
@@ -105,10 +121,7 @@ class BENCHMARK_EXPORT PerfCounters final {
   BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
 #ifndef BENCHMARK_OS_WINDOWS
     assert(values != nullptr);
-    assert(IsValid());
-    auto buffer = values->get_data_buffer();
-    auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
-    return static_cast<size_t>(read_bytes) == buffer.second;
+    return values->Read(leader_ids_) == counter_ids_.size();
 #else
     (void)values;
     return false;
@@ -120,13 +133,15 @@ class BENCHMARK_EXPORT PerfCounters final {
 
  private:
   PerfCounters(const std::vector<std::string>& counter_names,
-               std::vector<int>&& counter_ids)
-      : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
-  PerfCounters() = default;
+               std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
+      : counter_ids_(std::move(counter_ids)),
+        leader_ids_(std::move(leader_ids)),
+        counter_names_(counter_names) {}
 
   void CloseCounters() const;
 
   std::vector<int> counter_ids_;
+  std::vector<int> leader_ids_;
   std::vector<std::string> counter_names_;
 };
 
@@ -134,33 +149,25 @@ class BENCHMARK_EXPORT PerfCounters final {
 class BENCHMARK_EXPORT PerfCountersMeasurement final {
  public:
   PerfCountersMeasurement(const std::vector<std::string>& counter_names);
-  ~PerfCountersMeasurement();
-
-  // The only way to get to `counters_` is after ctor-ing a
-  // `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
-  // decided (either invalid or valid) and won't change again even if a ctor is
-  // concurrently running with this. This is preferring efficiency to
-  // maintainability, because the address of the static can be known at compile
-  // time.
-  bool IsValid() const {
-    MutexLock l(mutex_);
-    return counters_.IsValid();
-  }
 
-  BENCHMARK_ALWAYS_INLINE void Start() {
-    assert(IsValid());
-    MutexLock l(mutex_);
+  size_t num_counters() const { return counters_.num_counters(); }
+
+  std::vector<std::string> names() const { return counters_.names(); }
+
+  BENCHMARK_ALWAYS_INLINE bool Start() {
+    if (num_counters() == 0) return true;
     // Tell the compiler to not move instructions above/below where we take
     // the snapshot.
     ClobberMemory();
     valid_read_ &= counters_.Snapshot(&start_values_);
     ClobberMemory();
+
+    return valid_read_;
   }
 
   BENCHMARK_ALWAYS_INLINE bool Stop(
       std::vector<std::pair<std::string, double>>& measurements) {
-    assert(IsValid());
-    MutexLock l(mutex_);
+    if (num_counters() == 0) return true;
     // Tell the compiler to not move instructions above/below where we take
     // the snapshot.
     ClobberMemory();
@@ -177,9 +184,7 @@ class BENCHMARK_EXPORT PerfCountersMeasurement final {
   }
 
  private:
-  static Mutex mutex_;
-  GUARDED_BY(mutex_) static int ref_count_;
-  GUARDED_BY(mutex_) static PerfCounters counters_;
+  PerfCounters counters_;
   bool valid_read_ = true;
   PerfCounterValues start_values_;
   PerfCounterValues end_values_;
diff --git a/contrib/restricted/google/benchmark/src/sleep.cc b/contrib/restricted/google/benchmark/src/sleep.cc
deleted file mode 100644
index ab59000f24..0000000000
--- a/contrib/restricted/google/benchmark/src/sleep.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "sleep.h"
-
-#include <cerrno>
-#include <cstdlib>
-#include <ctime>
-
-#include "internal_macros.h"
-
-#ifdef BENCHMARK_OS_WINDOWS
-#include <windows.h>
-#endif
-
-#ifdef BENCHMARK_OS_ZOS
-#include <unistd.h>
-#endif
-
-namespace benchmark {
-#ifdef BENCHMARK_OS_WINDOWS
-// Window's Sleep takes milliseconds argument.
-void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
-void SleepForSeconds(double seconds) {
-  SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
-}
-#else  // BENCHMARK_OS_WINDOWS
-void SleepForMicroseconds(int microseconds) {
-#ifdef BENCHMARK_OS_ZOS
-  // z/OS does not support nanosleep. Instead call sleep() and then usleep() to
-  // sleep for the remaining microseconds because usleep() will fail if its
-  // argument is greater than 1000000.
-  div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
-  int seconds = sleepTime.quot;
-  while (seconds != 0) seconds = sleep(seconds);
-  while (usleep(sleepTime.rem) == -1 && errno == EINTR)
-    ;
-#else
-  struct timespec sleep_time;
-  sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
-  sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
-  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
-    ;  // Ignore signals and wait for the full interval to elapse.
-#endif
-}
-
-void SleepForMilliseconds(int milliseconds) {
-  SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
-}
-
-void SleepForSeconds(double seconds) {
-  SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
-}
-#endif  // BENCHMARK_OS_WINDOWS
-}  // end namespace benchmark
diff --git a/contrib/restricted/google/benchmark/src/sleep.h b/contrib/restricted/google/benchmark/src/sleep.h
deleted file mode 100644
index f98551afe2..0000000000
--- a/contrib/restricted/google/benchmark/src/sleep.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef BENCHMARK_SLEEP_H_
-#define BENCHMARK_SLEEP_H_
-
-namespace benchmark {
-const int kNumMillisPerSecond = 1000;
-const int kNumMicrosPerMilli = 1000;
-const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
-const int kNumNanosPerMicro = 1000;
-const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
-
-void SleepForMilliseconds(int milliseconds);
-void SleepForSeconds(double seconds);
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_SLEEP_H_
diff --git a/contrib/restricted/google/benchmark/src/statistics.cc b/contrib/restricted/google/benchmark/src/statistics.cc
index 5ba885ab00..c4b54b271f 100644
--- a/contrib/restricted/google/benchmark/src/statistics.cc
+++ b/contrib/restricted/google/benchmark/src/statistics.cc
@@ -89,9 +89,8 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   typedef BenchmarkReporter::Run Run;
   std::vector<Run> results;
 
-  auto error_count =
-      std::count_if(reports.begin(), reports.end(),
-                    [](Run const& run) { return run.error_occurred; });
+  auto error_count = std::count_if(reports.begin(), reports.end(),
+                                   [](Run const& run) { return run.skipped; });
 
   if (reports.size() - error_count < 2) {
     // We don't report aggregated data if there was a single run.
@@ -133,7 +132,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   for (Run const& run : reports) {
     BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
     BM_CHECK_EQ(run_iterations, run.iterations);
-    if (run.error_occurred) continue;
+    if (run.skipped) continue;
     real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
     cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
     // user counters
diff --git a/contrib/restricted/google/benchmark/src/statistics.h b/contrib/restricted/google/benchmark/src/statistics.h
index b0d2c05e72..6e5560e8f1 100644
--- a/contrib/restricted/google/benchmark/src/statistics.h
+++ b/contrib/restricted/google/benchmark/src/statistics.h
@@ -22,9 +22,10 @@
 
 namespace benchmark {
 
-// Return a vector containing the mean, median and standard devation information
-// (and any user-specified info) for the specified list of reports. If 'reports'
-// contains less than two non-errored runs an empty vector is returned
+// Return a vector containing the mean, median and standard deviation
+// information (and any user-specified info) for the specified list of reports.
+// If 'reports' contains less than two non-errored runs an empty vector is
+// returned
 BENCHMARK_EXPORT
 std::vector<BenchmarkReporter::Run> ComputeStats(
     const std::vector<BenchmarkReporter::Run>& reports);
diff --git a/contrib/restricted/google/benchmark/src/string_util.cc b/contrib/restricted/google/benchmark/src/string_util.cc
index b3196fc266..5e2d24a3cd 100644
--- a/contrib/restricted/google/benchmark/src/string_util.cc
+++ b/contrib/restricted/google/benchmark/src/string_util.cc
@@ -94,10 +94,10 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) {
 
   const char* array =
       (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
-  if (iec)
+  if (iec) {
     return array[index] + std::string("i");
-  else
-    return std::string(1, array[index]);
+  }
+  return std::string(1, array[index]);
 }
 
 std::string ToBinaryStringFullySpecified(double value, double threshold,
diff --git a/contrib/restricted/google/benchmark/src/sysinfo.cc b/contrib/restricted/google/benchmark/src/sysinfo.cc
index e763d5c481..80eece3ae7 100644
--- a/contrib/restricted/google/benchmark/src/sysinfo.cc
+++ b/contrib/restricted/google/benchmark/src/sysinfo.cc
@@ -46,6 +46,9 @@
 #if defined(BENCHMARK_OS_QURT)
 #error #include <qurt.h>
 #endif
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+#include <pthread.h>
+#endif
 
 #include <algorithm>
 #include <array>
@@ -62,15 +65,17 @@
 #include <limits>
 #include <locale>
 #include <memory>
+#include <random>
 #include <sstream>
 #include <utility>
 
+#include "benchmark/benchmark.h"
 #include "check.h"
 #include "cycleclock.h"
 #include "internal_macros.h"
 #include "log.h"
-#include "sleep.h"
 #include "string_util.h"
+#include "timers.h"
 
 namespace benchmark {
 namespace {
@@ -423,19 +428,12 @@ std::string GetSystemName() {
 #ifndef UNICODE
   str = std::string(hostname, DWCOUNT);
 #else
-  std::vector<wchar_t> converted;
-  // Find the length first.
-  int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname,
-                                  DWCOUNT, converted.begin(), 0);
-  // TODO: Report error from GetLastError()?
-  if (len == 0) return std::string("");
-  converted.reserve(len + 1);
-
-  len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname, DWCOUNT,
-                              converted.begin(), converted.size());
-  // TODO: Report error from GetLastError()?
-  if (len == 0) return std::string("");
-  str = std::string(converted.data());
+  // `WideCharToMultiByte` returns `0` when conversion fails.
+  int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname,
+                                DWCOUNT, NULL, 0, NULL, NULL);
+  str.resize(len);
+  WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0],
+                      str.size(), NULL, NULL);
 #endif
   return str;
 #elif defined(BENCHMARK_OS_QURT)
@@ -448,7 +446,7 @@ std::string GetSystemName() {
   return str;
 #else
 #ifndef HOST_NAME_MAX
-#ifdef BENCHMARK_HAS_SYSCTL  // BSD/Mac Doesnt have HOST_NAME_MAX defined
+#ifdef BENCHMARK_HAS_SYSCTL  // BSD/Mac doesn't have HOST_NAME_MAX defined
 #define HOST_NAME_MAX 64
 #elif defined(BENCHMARK_OS_NACL)
 #define HOST_NAME_MAX 64
@@ -551,6 +549,80 @@ int GetNumCPUs() {
   BENCHMARK_UNREACHABLE();
 }
 
+class ThreadAffinityGuard final {
+ public:
+  ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
+    if (!reset_affinity)
+      std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
+                   "frequency may be incorrect."
+                << std::endl;
+  }
+
+  ~ThreadAffinityGuard() {
+    if (!reset_affinity) return;
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+    int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
+                                     &previous_affinity);
+    if (ret == 0) return;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+    DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
+    if (ret != 0) return;
+#endif  // def BENCHMARK_HAS_PTHREAD_AFFINITY
+    PrintErrorAndDie("Failed to reset thread affinity");
+  }
+
+  ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
+  ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
+  ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
+  ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
+
+ private:
+  bool SetAffinity() {
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+    int ret;
+    self = pthread_self();
+    ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
+                                 &previous_affinity);
+    if (ret != 0) return false;
+
+    cpu_set_t affinity;
+    memcpy(&affinity, &previous_affinity, sizeof(affinity));
+
+    bool is_first_cpu = true;
+
+    for (int i = 0; i < CPU_SETSIZE; ++i)
+      if (CPU_ISSET(i, &affinity)) {
+        if (is_first_cpu)
+          is_first_cpu = false;
+        else
+          CPU_CLR(i, &affinity);
+      }
+
+    if (is_first_cpu) return false;
+
+    ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
+    return ret == 0;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+    self = GetCurrentThread();
+    DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
+    previous_affinity = SetThreadAffinityMask(self, mask);
+    return previous_affinity != 0;
+#else
+    return false;
+#endif  // def BENCHMARK_HAS_PTHREAD_AFFINITY
+  }
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+  pthread_t self;
+  cpu_set_t previous_affinity;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+  HANDLE self;
+  DWORD_PTR previous_affinity;
+#endif  // def BENCHMARK_HAS_PTHREAD_AFFINITY
+  bool reset_affinity;
+};
+
 double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
   // Currently, scaling is only used on linux path here,
   // suppress diagnostics about it being unused on other paths.
@@ -706,10 +778,39 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
   return 1000000000;
 #endif
   // If we've fallen through, attempt to roughly estimate the CPU clock rate.
-  static constexpr int estimate_time_ms = 1000;
+
+  // Make sure to use the same cycle counter when starting and stopping the
+  // cycle timer. We just pin the current thread to a cpu in the previous
+  // affinity set.
+  ThreadAffinityGuard affinity_guard;
+
+  static constexpr double estimate_time_s = 1.0;
+  const double start_time = ChronoClockNow();
   const auto start_ticks = cycleclock::Now();
-  SleepForMilliseconds(estimate_time_ms);
-  return static_cast<double>(cycleclock::Now() - start_ticks);
+
+  // Impose load instead of calling sleep() to make sure the cycle counter
+  // works.
+  using PRNG = std::minstd_rand;
+  using Result = PRNG::result_type;
+  PRNG rng(static_cast<Result>(start_ticks));
+
+  Result state = 0;
+
+  do {
+    static constexpr size_t batch_size = 10000;
+    rng.discard(batch_size);
+    state += rng();
+
+  } while (ChronoClockNow() - start_time < estimate_time_s);
+
+  DoNotOptimize(state);
+
+  const auto end_ticks = cycleclock::Now();
+  const double end_time = ChronoClockNow();
+
+  return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
+  // Reset the affinity of current thread when the lifetime of affinity_guard
+  // ends.
 }
 
 std::vector<double> GetLoadAvg() {
diff --git a/contrib/restricted/google/benchmark/src/thread_manager.h b/contrib/restricted/google/benchmark/src/thread_manager.h
index 4680285089..819b3c44db 100644
--- a/contrib/restricted/google/benchmark/src/thread_manager.h
+++ b/contrib/restricted/google/benchmark/src/thread_manager.h
@@ -43,8 +43,8 @@ class ThreadManager {
     double manual_time_used = 0;
     int64_t complexity_n = 0;
     std::string report_label_;
-    std::string error_message_;
-    bool has_error_ = false;
+    std::string skip_message_;
+    internal::Skipped skipped_ = internal::NotSkipped;
     UserCounters counters;
   };
   GUARDED_BY(GetBenchmarkMutex()) Result results;
diff --git a/contrib/restricted/google/benchmark/src/timers.cc b/contrib/restricted/google/benchmark/src/timers.cc
index 0a4da83c19..042895d0d4 100644
--- a/contrib/restricted/google/benchmark/src/timers.cc
+++ b/contrib/restricted/google/benchmark/src/timers.cc
@@ -59,7 +59,6 @@
 
 #include "check.h"
 #include "log.h"
-#include "sleep.h"
 #include "string_util.h"
 
 namespace benchmark {
@@ -68,6 +67,9 @@ namespace benchmark {
 #if defined(__GNUC__)
 #pragma GCC diagnostic ignored "-Wunused-function"
 #endif
+#if defined(__NVCOMPILER)
+#pragma diag_suppress declared_but_not_referenced
+#endif
 
 namespace {
 #if defined(BENCHMARK_OS_WINDOWS)
diff --git a/contrib/restricted/google/benchmark/test/string_util_gtest.cc b/contrib/restricted/google/benchmark/test/string_util_gtest.cc
index 698f2d43eb..8bfdb7a72c 100644
--- a/contrib/restricted/google/benchmark/test/string_util_gtest.cc
+++ b/contrib/restricted/google/benchmark/test/string_util_gtest.cc
@@ -2,6 +2,8 @@
 // statistics_test - Unit tests for src/statistics.cc
 //===---------------------------------------------------------------------===//
 
+#include <tuple>
+
 #include "../src/internal_macros.h"
 #include "../src/string_util.h"
 #include "gtest/gtest.h"
@@ -63,7 +65,10 @@ TEST(StringUtilTest, stoul) {
     EXPECT_EQ(4ul, pos);
   }
 #ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-  { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); }
+  {
+    ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"),
+                 std::invalid_argument);
+  }
 #endif
 }
 
@@ -107,7 +112,10 @@ EXPECT_EQ(1ul, pos);
   EXPECT_EQ(4ul, pos);
 }
 #ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); }
+{
+  ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"),
+               std::invalid_argument);
+}
 #endif
 }
 
@@ -137,7 +145,10 @@ EXPECT_EQ(1ul, pos);
   EXPECT_EQ(8ul, pos);
 }
 #ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); }
+{
+  ASSERT_THROW(std::ignore = benchmark::stod("this is a test"),
+               std::invalid_argument);
+}
 #endif
 }
 
diff --git a/contrib/restricted/google/benchmark/test/ya.make b/contrib/restricted/google/benchmark/test/ya.make
index e5ca53e0d8..0106f3212f 100644
--- a/contrib/restricted/google/benchmark/test/ya.make
+++ b/contrib/restricted/google/benchmark/test/ya.make
@@ -20,6 +20,7 @@ CFLAGS(
     -DBENCHMARK_STATIC_DEFINE
     -DGTEST_LINKED_AS_SHARED_LIBRARY=1
     -DHAVE_POSIX_REGEX
+    -DHAVE_PTHREAD_AFFINITY
     -DHAVE_STD_REGEX
     -DHAVE_STEADY_CLOCK
 )
diff --git a/contrib/restricted/google/benchmark/ya.make b/contrib/restricted/google/benchmark/ya.make
index 8ef295a7b5..886e082a5b 100644
--- a/contrib/restricted/google/benchmark/ya.make
+++ b/contrib/restricted/google/benchmark/ya.make
@@ -1,4 +1,4 @@
-# Generated by devtools/yamaker from nixpkgs 22.05.
+# Generated by devtools/yamaker from nixpkgs 22.11.
 
 LIBRARY()
 
@@ -6,9 +6,9 @@ LICENSE(Apache-2.0)
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-VERSION(1.7.1)
+VERSION(1.8.0)
 
-ORIGINAL_SOURCE(https://github.com/google/benchmark/archive/v1.7.1.tar.gz)
+ORIGINAL_SOURCE(https://github.com/google/benchmark/archive/v1.8.0.tar.gz)
 
 ADDINCL(
     GLOBAL contrib/restricted/google/benchmark/include
@@ -22,10 +22,17 @@ NO_UTIL()
 CFLAGS(
     GLOBAL -DBENCHMARK_STATIC_DEFINE
     -DHAVE_POSIX_REGEX
+    -DHAVE_PTHREAD_AFFINITY
     -DHAVE_STD_REGEX
     -DHAVE_STEADY_CLOCK
 )
 
+IF (OS_LINUX)
+    CFLAGS(
+        -DBENCHMARK_HAS_PTHREAD_AFFINITY
+    )
+ENDIF()
+
 SRCS(
     src/benchmark.cc
     src/benchmark_api_internal.cc
@@ -42,7 +49,6 @@ SRCS(
     src/json_reporter.cc
     src/perf_counters.cc
     src/reporter.cc
-    src/sleep.cc
     src/statistics.cc
     src/string_util.cc
     src/sysinfo.cc
author	thegeorg <thegeorg@yandex-team.com>	2023-06-14 13:05:42 +0300
committer	thegeorg <thegeorg@yandex-team.com>	2023-06-14 13:05:42 +0300
commit	4b972da4fb8c047e6c1b876f3d026f213cade3b7 (patch)
tree	4c7453323a0a572846b42a1091859c4867ae68f4
parent	bb4d56c9f2833ca3622dce9ee4a799910ea1549c (diff)
download	ydb-4b972da4fb8c047e6c1b876f3d026f213cade3b7.tar.gz