aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/restricted/google/benchmark/src
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-07-22 12:30:43 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-07-22 12:42:10 +0300
commit224f37eff4b81b528e56c22e48ab3ecf6500a593 (patch)
treeeb45faabd3a008b08639c516caed25d5e974e3cc /contrib/restricted/google/benchmark/src
parent942d2ca60b764e6bd7dff28a3ef04fefacbd576d (diff)
downloadydb-224f37eff4b81b528e56c22e48ab3ecf6500a593.tar.gz
Intermediate changes
Diffstat (limited to 'contrib/restricted/google/benchmark/src')
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark.cc4
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark_runner.cc59
-rw-r--r--contrib/restricted/google/benchmark/src/benchmark_runner.h5
-rw-r--r--contrib/restricted/google/benchmark/src/cycleclock.h5
-rw-r--r--contrib/restricted/google/benchmark/src/perf_counters.cc3
-rw-r--r--contrib/restricted/google/benchmark/src/sysinfo.cc8
-rw-r--r--contrib/restricted/google/benchmark/src/timers.cc12
-rw-r--r--contrib/restricted/google/benchmark/src/timers.h29
8 files changed, 99 insertions, 26 deletions
diff --git a/contrib/restricted/google/benchmark/src/benchmark.cc b/contrib/restricted/google/benchmark/src/benchmark.cc
index 337bb3faa7..374c5141c9 100644
--- a/contrib/restricted/google/benchmark/src/benchmark.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark.cc
@@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}
+void RegisterProfilerManager(ProfilerManager* manager) {
+ internal::profiler_manager = manager;
+}
+
void AddCustomContext(const std::string& key, const std::string& value) {
if (internal::global_context == nullptr) {
internal::global_context = new std::map<std::string, std::string>();
diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.cc b/contrib/restricted/google/benchmark/src/benchmark_runner.cc
index 5714587196..f5032e94dd 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_runner.cc
+++ b/contrib/restricted/google/benchmark/src/benchmark_runner.cc
@@ -62,6 +62,8 @@ namespace internal {
MemoryManager* memory_manager = nullptr;
+ProfilerManager* profiler_manager = nullptr;
+
namespace {
static constexpr IterationCount kMaxIterations = 1000000000000;
@@ -403,6 +405,41 @@ void BenchmarkRunner::RunWarmUp() {
}
}
+MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
+ IterationCount memory_iterations) {
+ // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
+ // optional so we don't have to own the Result here.
+ // Can't do it now due to cxx03.
+ memory_results.push_back(MemoryManager::Result());
+ MemoryManager::Result* memory_result = &memory_results.back();
+ memory_manager->Start();
+ std::unique_ptr<internal::ThreadManager> manager;
+ manager.reset(new internal::ThreadManager(1));
+ b.Setup();
+ RunInThread(&b, memory_iterations, 0, manager.get(),
+ perf_counters_measurement_ptr);
+ manager->WaitForAllThreads();
+ manager.reset();
+ b.Teardown();
+ memory_manager->Stop(*memory_result);
+ return memory_result;
+}
+
+void BenchmarkRunner::RunProfilerManager() {
+ // TODO: Provide a way to specify the number of iterations.
+ IterationCount profile_iterations = 1;
+ std::unique_ptr<internal::ThreadManager> manager;
+ manager.reset(new internal::ThreadManager(1));
+ b.Setup();
+ profiler_manager->AfterSetupStart();
+ RunInThread(&b, profile_iterations, 0, manager.get(),
+ /*perf_counters_measurement_ptr=*/nullptr);
+ manager->WaitForAllThreads();
+ profiler_manager->BeforeTeardownStop();
+ manager.reset();
+ b.Teardown();
+}
+
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
@@ -447,28 +484,18 @@ void BenchmarkRunner::DoOneRepetition() {
"then we should have accepted the current iteration run.");
}
- // Oh, one last thing, we need to also produce the 'memory measurements'..
+ // Produce memory measurements if requested.
MemoryManager::Result* memory_result = nullptr;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
- // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
- // optional so we don't have to own the Result here.
- // Can't do it now due to cxx03.
- memory_results.push_back(MemoryManager::Result());
- memory_result = &memory_results.back();
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
- memory_manager->Start();
- std::unique_ptr<internal::ThreadManager> manager;
- manager.reset(new internal::ThreadManager(1));
- b.Setup();
- RunInThread(&b, memory_iterations, 0, manager.get(),
- perf_counters_measurement_ptr);
- manager->WaitForAllThreads();
- manager.reset();
- b.Teardown();
- memory_manager->Stop(*memory_result);
+ memory_result = RunMemoryManager(memory_iterations);
+ }
+
+ if (profiler_manager != nullptr) {
+ RunProfilerManager();
}
// Ok, now actually report.
diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.h b/contrib/restricted/google/benchmark/src/benchmark_runner.h
index db2fa04396..cd34d2d5bb 100644
--- a/contrib/restricted/google/benchmark/src/benchmark_runner.h
+++ b/contrib/restricted/google/benchmark/src/benchmark_runner.h
@@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters);
namespace internal {
extern MemoryManager* memory_manager;
+extern ProfilerManager* profiler_manager;
struct RunResults {
std::vector<BenchmarkReporter::Run> non_aggregates;
@@ -113,6 +114,10 @@ class BenchmarkRunner {
};
IterationResults DoNIterations();
+ MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);
+
+ void RunProfilerManager();
+
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;
diff --git a/contrib/restricted/google/benchmark/src/cycleclock.h b/contrib/restricted/google/benchmark/src/cycleclock.h
index 36aa8e3c76..0c7f0408a4 100644
--- a/contrib/restricted/google/benchmark/src/cycleclock.h
+++ b/contrib/restricted/google/benchmark/src/cycleclock.h
@@ -205,11 +205,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
- return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
+ return static_cast<int64_t>((static_cast<uint64_t>(cycles_hi1) << 32) |
+ cycles_lo);
#else
uint64_t cycles;
asm volatile("rdtime %0" : "=r"(cycles));
- return cycles;
+ return static_cast<int64_t>(cycles);
#endif
#elif defined(__e2k__) || defined(__elbrus__)
struct timeval tv;
diff --git a/contrib/restricted/google/benchmark/src/perf_counters.cc b/contrib/restricted/google/benchmark/src/perf_counters.cc
index 3b204fd1cd..17f7c3200f 100644
--- a/contrib/restricted/google/benchmark/src/perf_counters.cc
+++ b/contrib/restricted/google/benchmark/src/perf_counters.cc
@@ -157,7 +157,8 @@ PerfCounters PerfCounters::Create(
attr.exclude_hv = true;
// Read all counters in a group in one read.
- attr.read_format = PERF_FORMAT_GROUP;
+ attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED |
+ // PERF_FORMAT_TOTAL_TIME_RUNNING;
int id = -1;
while (id < 0) {
diff --git a/contrib/restricted/google/benchmark/src/sysinfo.cc b/contrib/restricted/google/benchmark/src/sysinfo.cc
index 17746e124f..617d276e47 100644
--- a/contrib/restricted/google/benchmark/src/sysinfo.cc
+++ b/contrib/restricted/google/benchmark/src/sysinfo.cc
@@ -508,7 +508,8 @@ int GetNumCPUsImpl() {
int max_id = -1;
std::ifstream f("/proc/cpuinfo");
if (!f.is_open()) {
- PrintErrorAndDie("Failed to open /proc/cpuinfo");
+ std::cerr << "Failed to open /proc/cpuinfo\n";
+ return -1;
}
#if defined(__alpha__)
const std::string Key = "cpus detected";
@@ -557,9 +558,8 @@ int GetNumCPUsImpl() {
int GetNumCPUs() {
const int num_cpus = GetNumCPUsImpl();
if (num_cpus < 1) {
- PrintErrorAndDie(
- "Unable to extract number of CPUs. If your platform uses "
- "/proc/cpuinfo, custom support may need to be added.");
+ std::cerr << "Unable to extract number of CPUs. If your platform uses "
+ "/proc/cpuinfo, custom support may need to be added.\n";
}
return num_cpus;
}
diff --git a/contrib/restricted/google/benchmark/src/timers.cc b/contrib/restricted/google/benchmark/src/timers.cc
index c392649715..a0543fe3d1 100644
--- a/contrib/restricted/google/benchmark/src/timers.cc
+++ b/contrib/restricted/google/benchmark/src/timers.cc
@@ -126,8 +126,12 @@ double ProcessCPUUsage() {
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
#elif defined(BENCHMARK_OS_QURT)
+ // Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0,
+ // and doesn't appear to work on at least some devices (eg Samsung S22),
+ // so let's use the actually-documented and apparently-equivalent
+ // qurt_sysclock_get_hw_ticks() call instead.
return static_cast<double>(
- qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
+ qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) *
1.0e-6;
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
@@ -160,8 +164,12 @@ double ThreadCPUUsage() {
&user_time);
return MakeTime(kernel_time, user_time);
#elif defined(BENCHMARK_OS_QURT)
+ // Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0,
+ // and doesn't appear to work on at least some devices (eg Samsung S22),
+ // so let's use the actually-documented and apparently-equivalent
+ // qurt_sysclock_get_hw_ticks() call instead.
return static_cast<double>(
- qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
+ qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) *
1.0e-6;
#elif defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
diff --git a/contrib/restricted/google/benchmark/src/timers.h b/contrib/restricted/google/benchmark/src/timers.h
index 65606ccd93..690086b36c 100644
--- a/contrib/restricted/google/benchmark/src/timers.h
+++ b/contrib/restricted/google/benchmark/src/timers.h
@@ -15,6 +15,29 @@ double ChildrenCPUUsage();
// Return the CPU usage of the current thread
double ThreadCPUUsage();
+#if defined(BENCHMARK_OS_QURT)
+
+// std::chrono::now() can return 0 on some Hexagon devices;
+// this reads the value of a 56-bit, 19.2MHz hardware counter
+// and converts it to seconds. Unlike std::chrono, this doesn't
+// return an absolute time, but since ChronoClockNow() is only used
+// to compute elapsed time, this shouldn't matter.
+struct QuRTClock {
+ typedef uint64_t rep;
+ typedef std::ratio<1, 19200000> period;
+ typedef std::chrono::duration<rep, period> duration;
+ typedef std::chrono::time_point<QuRTClock> time_point;
+ static const bool is_steady = false;
+
+ static time_point now() {
+ unsigned long long count;
+ asm volatile(" %0 = c31:30 " : "=r"(count));
+ return time_point(static_cast<duration>(count));
+ }
+};
+
+#else
+
#if defined(HAVE_STEADY_CLOCK)
template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
struct ChooseSteadyClock {
@@ -25,10 +48,14 @@ template <>
struct ChooseSteadyClock<false> {
typedef std::chrono::steady_clock type;
};
+#endif // HAVE_STEADY_CLOCK
+
#endif
struct ChooseClockType {
-#if defined(HAVE_STEADY_CLOCK)
+#if defined(BENCHMARK_OS_QURT)
+ typedef QuRTClock type;
+#elif defined(HAVE_STEADY_CLOCK)
typedef ChooseSteadyClock<>::type type;
#else
typedef std::chrono::high_resolution_clock type;