diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-07-22 12:30:43 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-07-22 12:42:10 +0300 |
commit | 224f37eff4b81b528e56c22e48ab3ecf6500a593 (patch) | |
tree | eb45faabd3a008b08639c516caed25d5e974e3cc /contrib/restricted/google/benchmark/src | |
parent | 942d2ca60b764e6bd7dff28a3ef04fefacbd576d (diff) | |
download | ydb-224f37eff4b81b528e56c22e48ab3ecf6500a593.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/restricted/google/benchmark/src')
8 files changed, 99 insertions, 26 deletions
diff --git a/contrib/restricted/google/benchmark/src/benchmark.cc b/contrib/restricted/google/benchmark/src/benchmark.cc index 337bb3faa7..374c5141c9 100644 --- a/contrib/restricted/google/benchmark/src/benchmark.cc +++ b/contrib/restricted/google/benchmark/src/benchmark.cc @@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } +void RegisterProfilerManager(ProfilerManager* manager) { + internal::profiler_manager = manager; +} + void AddCustomContext(const std::string& key, const std::string& value) { if (internal::global_context == nullptr) { internal::global_context = new std::map<std::string, std::string>(); diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.cc b/contrib/restricted/google/benchmark/src/benchmark_runner.cc index 5714587196..f5032e94dd 100644 --- a/contrib/restricted/google/benchmark/src/benchmark_runner.cc +++ b/contrib/restricted/google/benchmark/src/benchmark_runner.cc @@ -62,6 +62,8 @@ namespace internal { MemoryManager* memory_manager = nullptr; +ProfilerManager* profiler_manager = nullptr; + namespace { static constexpr IterationCount kMaxIterations = 1000000000000; @@ -403,6 +405,41 @@ void BenchmarkRunner::RunWarmUp() { } } +MemoryManager::Result* BenchmarkRunner::RunMemoryManager( + IterationCount memory_iterations) { + // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an + // optional so we don't have to own the Result here. + // Can't do it now due to cxx03. + memory_results.push_back(MemoryManager::Result()); + MemoryManager::Result* memory_result = &memory_results.back(); + memory_manager->Start(); + std::unique_ptr<internal::ThreadManager> manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); + b.Teardown(); + memory_manager->Stop(*memory_result); + return memory_result; +} + +void BenchmarkRunner::RunProfilerManager() { + // TODO: Provide a way to specify the number of iterations. + IterationCount profile_iterations = 1; + std::unique_ptr<internal::ThreadManager> manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + profiler_manager->AfterSetupStart(); + RunInThread(&b, profile_iterations, 0, manager.get(), + /*perf_counters_measurement_ptr=*/nullptr); + manager->WaitForAllThreads(); + profiler_manager->BeforeTeardownStop(); + manager.reset(); + b.Teardown(); +} + void BenchmarkRunner::DoOneRepetition() { assert(HasRepeatsRemaining() && "Already done all repetitions?"); @@ -447,28 +484,18 @@ void BenchmarkRunner::DoOneRepetition() { "then we should have accepted the current iteration run."); } - // Oh, one last thing, we need to also produce the 'memory measurements'.. + // Produce memory measurements if requested. MemoryManager::Result* memory_result = nullptr; IterationCount memory_iterations = 0; if (memory_manager != nullptr) { - // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an - // optional so we don't have to own the Result here. - // Can't do it now due to cxx03. - memory_results.push_back(MemoryManager::Result()); - memory_result = &memory_results.back(); // Only run a few iterations to reduce the impact of one-time // allocations in benchmarks that are not properly managed. memory_iterations = std::min<IterationCount>(16, iters); - memory_manager->Start(); - std::unique_ptr<internal::ThreadManager> manager; - manager.reset(new internal::ThreadManager(1)); - b.Setup(); - RunInThread(&b, memory_iterations, 0, manager.get(), - perf_counters_measurement_ptr); - manager->WaitForAllThreads(); - manager.reset(); - b.Teardown(); - memory_manager->Stop(*memory_result); + memory_result = RunMemoryManager(memory_iterations); + } + + if (profiler_manager != nullptr) { + RunProfilerManager(); } // Ok, now actually report. diff --git a/contrib/restricted/google/benchmark/src/benchmark_runner.h b/contrib/restricted/google/benchmark/src/benchmark_runner.h index db2fa04396..cd34d2d5bb 100644 --- a/contrib/restricted/google/benchmark/src/benchmark_runner.h +++ b/contrib/restricted/google/benchmark/src/benchmark_runner.h @@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters); namespace internal { extern MemoryManager* memory_manager; +extern ProfilerManager* profiler_manager; struct RunResults { std::vector<BenchmarkReporter::Run> non_aggregates; @@ -113,6 +114,10 @@ class BenchmarkRunner { }; IterationResults DoNIterations(); + MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations); + + void RunProfilerManager(); + IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; diff --git a/contrib/restricted/google/benchmark/src/cycleclock.h b/contrib/restricted/google/benchmark/src/cycleclock.h index 36aa8e3c76..0c7f0408a4 100644 --- a/contrib/restricted/google/benchmark/src/cycleclock.h +++ b/contrib/restricted/google/benchmark/src/cycleclock.h @@ -205,11 +205,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { "sub %0, zero, %0\n" "and %1, %1, %0\n" : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); - return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo; + return static_cast<int64_t>((static_cast<uint64_t>(cycles_hi1) << 32) | + cycles_lo); #else uint64_t cycles; asm volatile("rdtime %0" : "=r"(cycles)); - return cycles; + return static_cast<int64_t>(cycles); #endif #elif defined(__e2k__) || defined(__elbrus__) struct timeval tv; diff --git a/contrib/restricted/google/benchmark/src/perf_counters.cc b/contrib/restricted/google/benchmark/src/perf_counters.cc index 3b204fd1cd..17f7c3200f 100644 --- a/contrib/restricted/google/benchmark/src/perf_counters.cc +++ b/contrib/restricted/google/benchmark/src/perf_counters.cc @@ -157,7 +157,8 @@ PerfCounters PerfCounters::Create( attr.exclude_hv = true; // Read all counters in a group in one read. - attr.read_format = PERF_FORMAT_GROUP; + attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED | + // PERF_FORMAT_TOTAL_TIME_RUNNING; int id = -1; while (id < 0) { diff --git a/contrib/restricted/google/benchmark/src/sysinfo.cc b/contrib/restricted/google/benchmark/src/sysinfo.cc index 17746e124f..617d276e47 100644 --- a/contrib/restricted/google/benchmark/src/sysinfo.cc +++ b/contrib/restricted/google/benchmark/src/sysinfo.cc @@ -508,7 +508,8 @@ int GetNumCPUsImpl() { int max_id = -1; std::ifstream f("/proc/cpuinfo"); if (!f.is_open()) { - PrintErrorAndDie("Failed to open /proc/cpuinfo"); + std::cerr << "Failed to open /proc/cpuinfo\n"; + return -1; } #if defined(__alpha__) const std::string Key = "cpus detected"; @@ -557,9 +558,8 @@ int GetNumCPUsImpl() { int GetNumCPUs() { const int num_cpus = GetNumCPUsImpl(); if (num_cpus < 1) { - PrintErrorAndDie( - "Unable to extract number of CPUs. If your platform uses " - "/proc/cpuinfo, custom support may need to be added."); + std::cerr << "Unable to extract number of CPUs. If your platform uses " + "/proc/cpuinfo, custom support may need to be added.\n"; } return num_cpus; } diff --git a/contrib/restricted/google/benchmark/src/timers.cc b/contrib/restricted/google/benchmark/src/timers.cc index c392649715..a0543fe3d1 100644 --- a/contrib/restricted/google/benchmark/src/timers.cc +++ b/contrib/restricted/google/benchmark/src/timers.cc @@ -126,8 +126,12 @@ double ProcessCPUUsage() { return MakeTime(kernel_time, user_time); DiagnoseAndExit("GetProccessTimes() failed"); #elif defined(BENCHMARK_OS_QURT) + // Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0, + // and doesn't appear to work on at least some devices (eg Samsung S22), + // so let's use the actually-documented and apparently-equivalent + // qurt_sysclock_get_hw_ticks() call instead. return static_cast<double>( - qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) * 1.0e-6; #elif defined(BENCHMARK_OS_EMSCRIPTEN) // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. @@ -160,8 +164,12 @@ double ThreadCPUUsage() { &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_QURT) + // Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0, + // and doesn't appear to work on at least some devices (eg Samsung S22), + // so let's use the actually-documented and apparently-equivalent + // qurt_sysclock_get_hw_ticks() call instead. return static_cast<double>( - qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) * 1.0e-6; #elif defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. diff --git a/contrib/restricted/google/benchmark/src/timers.h b/contrib/restricted/google/benchmark/src/timers.h index 65606ccd93..690086b36c 100644 --- a/contrib/restricted/google/benchmark/src/timers.h +++ b/contrib/restricted/google/benchmark/src/timers.h @@ -15,6 +15,29 @@ double ChildrenCPUUsage(); // Return the CPU usage of the current thread double ThreadCPUUsage(); +#if defined(BENCHMARK_OS_QURT) + +// std::chrono::now() can return 0 on some Hexagon devices; +// this reads the value of a 56-bit, 19.2MHz hardware counter +// and converts it to seconds. Unlike std::chrono, this doesn't +// return an absolute time, but since ChronoClockNow() is only used +// to compute elapsed time, this shouldn't matter. +struct QuRTClock { + typedef uint64_t rep; + typedef std::ratio<1, 19200000> period; + typedef std::chrono::duration<rep, period> duration; + typedef std::chrono::time_point<QuRTClock> time_point; + static const bool is_steady = false; + + static time_point now() { + unsigned long long count; + asm volatile(" %0 = c31:30 " : "=r"(count)); + return time_point(static_cast<duration>(count)); + } +}; + +#else + #if defined(HAVE_STEADY_CLOCK) template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady> struct ChooseSteadyClock { @@ -25,10 +48,14 @@ template <> struct ChooseSteadyClock<false> { typedef std::chrono::steady_clock type; }; +#endif // HAVE_STEADY_CLOCK + #endif struct ChooseClockType { -#if defined(HAVE_STEADY_CLOCK) +#if defined(BENCHMARK_OS_QURT) + typedef QuRTClock type; +#elif defined(HAVE_STEADY_CLOCK) typedef ChooseSteadyClock<>::type type; #else typedef std::chrono::high_resolution_clock type; |