aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/execprofile/profile.cpp
diff options
context:
space:
mode:
authorAlexander Gololobov <davenger@yandex-team.com>2022-02-10 16:47:37 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:37 +0300
commit39608cdb86363c75ce55b2b9a69841c3b71f22cf (patch)
tree4ec132c1665bd4d68e3628aa18d937c70d32413b /library/cpp/execprofile/profile.cpp
parent54295b9bd4dc45c54d804084fd846d945148a7f0 (diff)
downloadydb-39608cdb86363c75ce55b2b9a69841c3b71f22cf.tar.gz
Restoring authorship annotation for Alexander Gololobov <davenger@yandex-team.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/execprofile/profile.cpp')
-rw-r--r--library/cpp/execprofile/profile.cpp586
1 files changed, 293 insertions, 293 deletions
diff --git a/library/cpp/execprofile/profile.cpp b/library/cpp/execprofile/profile.cpp
index d05de20203..19003a9a51 100644
--- a/library/cpp/execprofile/profile.cpp
+++ b/library/cpp/execprofile/profile.cpp
@@ -1,190 +1,190 @@
#include <util/system/defaults.h>
-#include "profile.h"
-
+#include "profile.h"
+
#if defined(_unix_) && !defined(_bionic_) && !defined(_cygwin_)
-
-#include <signal.h>
-#include <sys/time.h>
-#include <sys/resource.h>
+
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#if defined(_darwin_)
#include <sys/ucontext.h>
#else
-#include <ucontext.h>
+#include <ucontext.h>
#endif
#include <dlfcn.h>
-#include <util/system/platform.h>
+#include <util/system/platform.h>
#include <util/generic/hash.h>
#include <util/generic/map.h>
#include <util/generic/noncopyable.h>
#include <util/generic/algorithm.h>
#include <util/generic/vector.h>
-#include <util/stream/file.h>
-#include <util/string/util.h>
-#include <util/system/datetime.h>
-
-// This class sets SIGPROF handler and captures instruction pointer in it.
+#include <util/stream/file.h>
+#include <util/string/util.h>
+#include <util/system/datetime.h>
+
+// This class sets SIGPROF handler and captures instruction pointer in it.
class TExecutionSampler : TNonCopyable {
-public:
+public:
typedef TVector<std::pair<void*, size_t>> TSampleVector;
-
- struct TStats {
- ui64 SavedSamples;
- ui64 DroppedSamples;
- ui64 SearchSkipCount;
- };
-
- // NOTE: There is no synchronization here as the instance is supposed to be
- // created on the main thread.
+
+ struct TStats {
+ ui64 SavedSamples;
+ ui64 DroppedSamples;
+ ui64 SearchSkipCount;
+ };
+
+ // NOTE: There is no synchronization here as the instance is supposed to be
+ // created on the main thread.
static TExecutionSampler* Instance() {
if (SInstance == nullptr) {
- SInstance = new TExecutionSampler();
- }
-
- return SInstance;
- }
-
+ SInstance = new TExecutionSampler();
+ }
+
+ return SInstance;
+ }
+
void Start() {
- // Set signal handler
- struct sigaction sa;
- sa.sa_sigaction = ProfilerSignalHandler;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGPROF, &sa, &OldSignalHandler) != 0)
- return;
-
- // Set interval timer
- itimerval tv;
- tv.it_interval.tv_sec = tv.it_value.tv_sec = 0;
- tv.it_interval.tv_usec = tv.it_value.tv_usec = SAMPLE_INTERVAL;
- setitimer(ITIMER_PROF, &tv, &OldTimerValue);
-
- Started = true;
- }
-
+ // Set signal handler
+ struct sigaction sa;
+ sa.sa_sigaction = ProfilerSignalHandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGPROF, &sa, &OldSignalHandler) != 0)
+ return;
+
+ // Set interval timer
+ itimerval tv;
+ tv.it_interval.tv_sec = tv.it_value.tv_sec = 0;
+ tv.it_interval.tv_usec = tv.it_value.tv_usec = SAMPLE_INTERVAL;
+ setitimer(ITIMER_PROF, &tv, &OldTimerValue);
+
+ Started = true;
+ }
+
void Stop(TSampleVector& sampleVector, TStats& stats) {
- // Reset signal handler and timer
- if (Started) {
+ // Reset signal handler and timer
+ if (Started) {
setitimer(ITIMER_PROF, &OldTimerValue, nullptr);
- sleep(1);
- }
-
- WaitForWriteFlag();
-
- if (Started) {
+ sleep(1);
+ }
+
+ WaitForWriteFlag();
+
+ if (Started) {
sigaction(SIGPROF, &OldSignalHandler, nullptr);
- Started = false;
- }
-
- TExecutionSampler::TSampleVector hits;
- hits.reserve(Samples);
- for (size_t i = 0; i < SZ; ++i) {
+ Started = false;
+ }
+
+ TExecutionSampler::TSampleVector hits;
+ hits.reserve(Samples);
+ for (size_t i = 0; i < SZ; ++i) {
if (Ips[i].first != nullptr) {
- hits.push_back(Ips[i]);
- }
- }
- stats.SavedSamples = Samples;
- stats.DroppedSamples = AtomicGet(DroppedSamples);
- stats.SearchSkipCount = SearchSkipCount;
- AtomicUnlock(&WriteFlag);
-
+ hits.push_back(Ips[i]);
+ }
+ }
+ stats.SavedSamples = Samples;
+ stats.DroppedSamples = AtomicGet(DroppedSamples);
+ stats.SearchSkipCount = SearchSkipCount;
+ AtomicUnlock(&WriteFlag);
+
Sort(hits.begin(), hits.end(), TCompareFirst());
-
- sampleVector.swap(hits);
- }
-
+
+ sampleVector.swap(hits);
+ }
+
void ResetStats() {
- WaitForWriteFlag();
- Clear();
- AtomicUnlock(&WriteFlag);
- }
-
-private:
+ WaitForWriteFlag();
+ Clear();
+ AtomicUnlock(&WriteFlag);
+ }
+
+private:
static const size_t SZ = 2 * 1024 * 1024; // size of the hash table
// inserts work faster if it's a power of 2
static const int SAMPLE_INTERVAL = 1000; // in microseconds
-
- struct TCompareFirst {
+
+ struct TCompareFirst {
bool operator()(const std::pair<void*, size_t>& a, const std::pair<void*, size_t>& b) const {
- return a.first < b.first;
- }
- };
-
- TExecutionSampler()
- : Started(false)
- , Ips(SZ)
- , WriteFlag(0)
- , DroppedSamples(0)
- , Samples(0)
- , UniqueSamples(0)
- , SearchSkipCount(0)
- {
- }
-
+ return a.first < b.first;
+ }
+ };
+
+ TExecutionSampler()
+ : Started(false)
+ , Ips(SZ)
+ , WriteFlag(0)
+ , DroppedSamples(0)
+ , Samples(0)
+ , UniqueSamples(0)
+ , SearchSkipCount(0)
+ {
+ }
+
~TExecutionSampler() = default;
-
- // Signal handler is not allowed to do anything that can deadlock with activity
- // on the thread to which the signal is delivered or corrupt data structures that
- // were in process of update.
- // One such thing is memory allocation. That's why a fixed size vector is
- // preallocated at start.
+
+ // Signal handler is not allowed to do anything that can deadlock with activity
+ // on the thread to which the signal is delivered or corrupt data structures that
+ // were in process of update.
+ // One such thing is memory allocation. That's why a fixed size vector is
+ // preallocated at start.
static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
- (void)info;
- if (signal != SIGPROF) {
- return;
- }
-
- ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context);
+ (void)info;
+ if (signal != SIGPROF) {
+ return;
+ }
+
+ ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context);
Y_ASSERT(SInstance != nullptr);
-
- SInstance->CaptureIP(GetIp(&ucontext->uc_mcontext));
- }
-
+
+ SInstance->CaptureIP(GetIp(&ucontext->uc_mcontext));
+ }
+
void WaitForWriteFlag() {
- // Wait for write flag to be reset
- ui32 delay = 100;
- while (!AtomicTryLock(&WriteFlag)) {
- usleep(delay);
- delay += delay;
+ // Wait for write flag to be reset
+ ui32 delay = 100;
+ while (!AtomicTryLock(&WriteFlag)) {
+ usleep(delay);
+ delay += delay;
delay = Min(delay, (ui32)5000);
- }
- }
-
+ }
+ }
+
void CaptureIP(void* rip) {
- // Check if the handler on another thread is in the process of adding a sample
- // If this is the case, we just drop the current sample as this should happen
- // rarely.
- if (AtomicTryLock(&WriteFlag)) {
- AddSample(rip);
- AtomicUnlock(&WriteFlag);
- } else {
+ // Check if the handler on another thread is in the process of adding a sample
+ // If this is the case, we just drop the current sample as this should happen
+ // rarely.
+ if (AtomicTryLock(&WriteFlag)) {
+ AddSample(rip);
+ AtomicUnlock(&WriteFlag);
+ } else {
AtomicIncrement(DroppedSamples);
- }
- }
-
- // Hash function applied to the addresses
+ }
+ }
+
+ // Hash function applied to the addresses
static inline ui32 Hash(void* key) {
- return ((size_t)key + (size_t)key / SZ) % SZ;
- }
-
- // Get instruction pointer from the context
+ return ((size_t)key + (size_t)key / SZ) % SZ;
+ }
+
+ // Get instruction pointer from the context
static inline void* GetIp(const mcontext_t* mctx) {
-#if defined _freebsd_
+#if defined _freebsd_
#if defined _64_
- return (void*)mctx->mc_rip;
+ return (void*)mctx->mc_rip;
#else
- return (void*)mctx->mc_eip;
+ return (void*)mctx->mc_eip;
#endif
-#elif defined _linux_
+#elif defined _linux_
#if defined _64_
#if defined(_arm_)
return (void*)mctx->pc;
#else
- return (void*)mctx->gregs[REG_RIP];
+ return (void*)mctx->gregs[REG_RIP];
#endif
#else
- return (void*)mctx->gregs[REG_EIP];
+ return (void*)mctx->gregs[REG_EIP];
#endif
#elif defined _darwin_
#if defined _64_
@@ -199,67 +199,67 @@ private:
#else
return (void*)(*mctx)->__ss.__eip;
#endif
+#endif
#endif
-#endif
- }
-
+ }
+
inline bool AddSample(void* key) {
- ui32 slot = Hash(key);
- ui32 prevSlot = (slot - 1) % SZ;
-
- while (key != Ips[slot].first && !IsSlotEmpty(slot) && slot != prevSlot) {
- slot = (slot + 1) % SZ;
- SearchSkipCount++;
- }
-
- if (key == Ips[slot].first) {
- // increment the count
- Ips[slot].second++;
- ++Samples;
- } else if (InsertsAllowed()) {
- // add new sample and set the count to 1
- Ips[slot].first = key;
- Ips[slot].second = 1;
- ++UniqueSamples;
- ++Samples;
- } else {
- // don't insert new sample if the search is becoming too slow
+ ui32 slot = Hash(key);
+ ui32 prevSlot = (slot - 1) % SZ;
+
+ while (key != Ips[slot].first && !IsSlotEmpty(slot) && slot != prevSlot) {
+ slot = (slot + 1) % SZ;
+ SearchSkipCount++;
+ }
+
+ if (key == Ips[slot].first) {
+ // increment the count
+ Ips[slot].second++;
+ ++Samples;
+ } else if (InsertsAllowed()) {
+ // add new sample and set the count to 1
+ Ips[slot].first = key;
+ Ips[slot].second = 1;
+ ++UniqueSamples;
+ ++Samples;
+ } else {
+ // don't insert new sample if the search is becoming too slow
AtomicIncrement(DroppedSamples);
- return false;
- }
-
- return true;
- }
-
+ return false;
+ }
+
+ return true;
+ }
+
inline bool IsSlotEmpty(ui32 slot) const {
return Ips[slot].first == nullptr;
- }
-
+ }
+
inline bool InsertsAllowed() const {
- return UniqueSamples < SZ / 2;
- }
-
- void
+ return UniqueSamples < SZ / 2;
+ }
+
+ void
Clear() {
Y_ASSERT(WriteFlag == 1);
-
- for (size_t i = 0; i < SZ; ++i) {
+
+ for (size_t i = 0; i < SZ; ++i) {
Ips[i] = std::make_pair((void*)nullptr, (size_t)0);
- }
- Samples = 0;
- AtomicSet(DroppedSamples, 0);
- UniqueSamples = 0;
- SearchSkipCount = 0;
- }
-
+ }
+ Samples = 0;
+ AtomicSet(DroppedSamples, 0);
+ UniqueSamples = 0;
+ SearchSkipCount = 0;
+ }
+
bool Started;
struct sigaction OldSignalHandler;
itimerval OldTimerValue;
-
+
TVector<std::pair<void*, size_t>>
Ips; // The hash table storing addresses and their hitcounts
-
- // TODO: on a big multiproc cache line false sharing by the flag and count might become an issue
+
+ // TODO: on a big multiproc cache line false sharing by the flag and count might become an issue
TAtomic WriteFlag; // Is used to syncronize access to the hash table
TAtomic DroppedSamples; // "dropped sample" count will show how many times
// a sample was dropped either because of write conflict
@@ -267,87 +267,87 @@ private:
ui64 Samples; // Saved samples count
ui64 UniqueSamples; // Number of unique addresses
ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions
-
- static TExecutionSampler* SInstance;
-};
-
-// Performs analysis of samples captured by TExecutionSampler
-class TSampleAnalyser : TNonCopyable {
-public:
- TSampleAnalyser(TExecutionSampler::TSampleVector& samples, const TExecutionSampler::TStats& stats, bool putTimeStamps = false)
- : Samples()
- , Stats(stats)
- , PutTimestamps(putTimeStamps)
- {
- Samples.swap(samples);
- }
-
+
+ static TExecutionSampler* SInstance;
+};
+
+// Performs analysis of samples captured by TExecutionSampler
+class TSampleAnalyser : TNonCopyable {
+public:
+ TSampleAnalyser(TExecutionSampler::TSampleVector& samples, const TExecutionSampler::TStats& stats, bool putTimeStamps = false)
+ : Samples()
+ , Stats(stats)
+ , PutTimestamps(putTimeStamps)
+ {
+ Samples.swap(samples);
+ }
+
~TSampleAnalyser() = default;
-
- void Analyze(FILE* out) const;
-
-private:
- TExecutionSampler::TSampleVector Samples;
+
+ void Analyze(FILE* out) const;
+
+private:
+ TExecutionSampler::TSampleVector Samples;
TExecutionSampler::TStats Stats;
- bool PutTimestamps;
-};
-
+ bool PutTimestamps;
+};
+
void TSampleAnalyser::Analyze(FILE* out) const {
fprintf(out, "samples: %" PRIu64 " unique: %" PRIu64 " dropped: %" PRIu64 " searchskips: %" PRIu64 "\n",
(ui64)Stats.SavedSamples, (ui64)Samples.size(),
(ui64)Stats.DroppedSamples, (ui64)Stats.SearchSkipCount);
-
- fprintf(out, "\nSamples:\n");
- size_t funcCnt = 0;
- void* prevModBase = (void*)-1;
- void* prevFunc = (void*)-1;
- for (size_t i = 0; i < Samples.size(); ++i) {
- // print cycle count once in a while to estimate time consumed by
- // dumping the samples
- if (PutTimestamps && (i % 1000 == 0)) {
- ui64 tm = GetCycleCount();
+
+ fprintf(out, "\nSamples:\n");
+ size_t funcCnt = 0;
+ void* prevModBase = (void*)-1;
+ void* prevFunc = (void*)-1;
+ for (size_t i = 0; i < Samples.size(); ++i) {
+ // print cycle count once in a while to estimate time consumed by
+ // dumping the samples
+ if (PutTimestamps && (i % 1000 == 0)) {
+ ui64 tm = GetCycleCount();
fprintf(out, "TM: %" PRIu64 "\n", tm);
- }
-
- Dl_info addrInfo;
- if (dladdr(Samples[i].first, &addrInfo)) {
- if (addrInfo.dli_fbase != prevModBase || addrInfo.dli_saddr != prevFunc) {
+ }
+
+ Dl_info addrInfo;
+ if (dladdr(Samples[i].first, &addrInfo)) {
+ if (addrInfo.dli_fbase != prevModBase || addrInfo.dli_saddr != prevFunc) {
fprintf(out, "Func\t%" PRISZT "\t%p\t%p\t%s\t%s\n",
funcCnt,
addrInfo.dli_fbase,
addrInfo.dli_saddr,
addrInfo.dli_fname,
addrInfo.dli_sname);
- prevModBase = addrInfo.dli_fbase;
- prevFunc = addrInfo.dli_saddr;
- ++funcCnt;
- }
- } else {
- fprintf(out, "[dladdr failed]\n");
- }
+ prevModBase = addrInfo.dli_fbase;
+ prevFunc = addrInfo.dli_saddr;
+ ++funcCnt;
+ }
+ } else {
+ fprintf(out, "[dladdr failed]\n");
+ }
fprintf(out, "%" PRISZT "\t%p\t%lu\n", i, Samples[i].first, Samples[i].second);
- }
-}
-
+ }
+}
+
TExecutionSampler* TExecutionSampler::SInstance = nullptr;
-
-// Starts capturing execution samples
-void BeginProfiling() {
- TExecutionSampler::Instance()->Start();
-}
-
-// Resets captured execution samples
-void ResetProfile() {
- TExecutionSampler::Instance()->ResetStats();
-}
-
-void DumpRUsage(FILE* out) {
- rusage ru;
- int e = getrusage(RUSAGE_SELF, &ru);
- if (e != 0)
- return;
-
- fprintf(out,
+
+// Starts capturing execution samples
+void BeginProfiling() {
+ TExecutionSampler::Instance()->Start();
+}
+
+// Resets captured execution samples
+void ResetProfile() {
+ TExecutionSampler::Instance()->ResetStats();
+}
+
+void DumpRUsage(FILE* out) {
+ rusage ru;
+ int e = getrusage(RUSAGE_SELF, &ru);
+ if (e != 0)
+ return;
+
+ fprintf(out,
"user time: %lf\n"
"system time: %lf\n"
"max RSS: %ld\n"
@@ -372,46 +372,46 @@ void DumpRUsage(FILE* out) {
ru.ru_msgsnd, ru.ru_msgrcv,
ru.ru_nsignals,
ru.ru_nvcsw, ru.ru_nivcsw);
-}
-
-// Pauses capturing execution samples and dumps them to the file
-// Samples are not cleared so that profiling can be continued by calling BeginProfiling()
-// or it can be started from scratch by calling ResetProfile() and then BeginProfiling()
-void EndProfiling(FILE* out) {
- DumpRUsage(out);
-
- TExecutionSampler::TSampleVector samples;
- TExecutionSampler::TStats stats;
- TExecutionSampler::Instance()->Stop(samples, stats);
-
- TSampleAnalyser analyzer(samples, stats);
- analyzer.Analyze(out);
-}
-
-void EndProfiling() {
- static unsigned cnt = 0;
- char nameBuf[256];
- snprintf(nameBuf, sizeof(nameBuf), "./%s.%d.%u.profile", getprogname(), (int)getpid(), cnt);
- FILE* out = fopen(nameBuf, "a");
- EndProfiling(out);
- fclose(out);
- ++cnt;
-}
-
+}
+
+// Pauses capturing execution samples and dumps them to the file
+// Samples are not cleared so that profiling can be continued by calling BeginProfiling()
+// or it can be started from scratch by calling ResetProfile() and then BeginProfiling()
+void EndProfiling(FILE* out) {
+ DumpRUsage(out);
+
+ TExecutionSampler::TSampleVector samples;
+ TExecutionSampler::TStats stats;
+ TExecutionSampler::Instance()->Stop(samples, stats);
+
+ TSampleAnalyser analyzer(samples, stats);
+ analyzer.Analyze(out);
+}
+
+void EndProfiling() {
+ static unsigned cnt = 0;
+ char nameBuf[256];
+ snprintf(nameBuf, sizeof(nameBuf), "./%s.%d.%u.profile", getprogname(), (int)getpid(), cnt);
+ FILE* out = fopen(nameBuf, "a");
+ EndProfiling(out);
+ fclose(out);
+ ++cnt;
+}
+
#else
-
-// NOTE: not supported on Windows
-
-void BeginProfiling() {
-}
-
-void ResetProfile() {
-}
-
+
+// NOTE: not supported on Windows
+
+void BeginProfiling() {
+}
+
+void ResetProfile() {
+}
+
void EndProfiling(FILE*) {
-}
-
-void EndProfiling() {
-}
-
+}
+
+void EndProfiling() {
+}
+
#endif