diff options
author | Alexander Gololobov <davenger@yandex-team.com> | 2022-02-10 16:47:38 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:38 +0300 |
commit | fccc62e9bfdce9be2fe7e0f23479da3a5512211a (patch) | |
tree | c0748b5dcbade83af788c0abfa89c0383d6b779c /library/cpp/execprofile/profile.cpp | |
parent | 39608cdb86363c75ce55b2b9a69841c3b71f22cf (diff) | |
download | ydb-fccc62e9bfdce9be2fe7e0f23479da3a5512211a.tar.gz |
Restoring authorship annotation for Alexander Gololobov <davenger@yandex-team.com>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/execprofile/profile.cpp')
-rw-r--r-- | library/cpp/execprofile/profile.cpp | 586 |
1 files changed, 293 insertions, 293 deletions
diff --git a/library/cpp/execprofile/profile.cpp b/library/cpp/execprofile/profile.cpp index 19003a9a51..d05de20203 100644 --- a/library/cpp/execprofile/profile.cpp +++ b/library/cpp/execprofile/profile.cpp @@ -1,190 +1,190 @@ #include <util/system/defaults.h> -#include "profile.h" - +#include "profile.h" + #if defined(_unix_) && !defined(_bionic_) && !defined(_cygwin_) - -#include <signal.h> -#include <sys/time.h> -#include <sys/resource.h> + +#include <signal.h> +#include <sys/time.h> +#include <sys/resource.h> #if defined(_darwin_) #include <sys/ucontext.h> #else -#include <ucontext.h> +#include <ucontext.h> #endif #include <dlfcn.h> -#include <util/system/platform.h> +#include <util/system/platform.h> #include <util/generic/hash.h> #include <util/generic/map.h> #include <util/generic/noncopyable.h> #include <util/generic/algorithm.h> #include <util/generic/vector.h> -#include <util/stream/file.h> -#include <util/string/util.h> -#include <util/system/datetime.h> - -// This class sets SIGPROF handler and captures instruction pointer in it. +#include <util/stream/file.h> +#include <util/string/util.h> +#include <util/system/datetime.h> + +// This class sets SIGPROF handler and captures instruction pointer in it. class TExecutionSampler : TNonCopyable { -public: +public: typedef TVector<std::pair<void*, size_t>> TSampleVector; - - struct TStats { - ui64 SavedSamples; - ui64 DroppedSamples; - ui64 SearchSkipCount; - }; - - // NOTE: There is no synchronization here as the instance is supposed to be - // created on the main thread. + + struct TStats { + ui64 SavedSamples; + ui64 DroppedSamples; + ui64 SearchSkipCount; + }; + + // NOTE: There is no synchronization here as the instance is supposed to be + // created on the main thread. static TExecutionSampler* Instance() { if (SInstance == nullptr) { - SInstance = new TExecutionSampler(); - } - - return SInstance; - } - + SInstance = new TExecutionSampler(); + } + + return SInstance; + } + void Start() { - // Set signal handler - struct sigaction sa; - sa.sa_sigaction = ProfilerSignalHandler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGPROF, &sa, &OldSignalHandler) != 0) - return; - - // Set interval timer - itimerval tv; - tv.it_interval.tv_sec = tv.it_value.tv_sec = 0; - tv.it_interval.tv_usec = tv.it_value.tv_usec = SAMPLE_INTERVAL; - setitimer(ITIMER_PROF, &tv, &OldTimerValue); - - Started = true; - } - + // Set signal handler + struct sigaction sa; + sa.sa_sigaction = ProfilerSignalHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &OldSignalHandler) != 0) + return; + + // Set interval timer + itimerval tv; + tv.it_interval.tv_sec = tv.it_value.tv_sec = 0; + tv.it_interval.tv_usec = tv.it_value.tv_usec = SAMPLE_INTERVAL; + setitimer(ITIMER_PROF, &tv, &OldTimerValue); + + Started = true; + } + void Stop(TSampleVector& sampleVector, TStats& stats) { - // Reset signal handler and timer - if (Started) { + // Reset signal handler and timer + if (Started) { setitimer(ITIMER_PROF, &OldTimerValue, nullptr); - sleep(1); - } - - WaitForWriteFlag(); - - if (Started) { + sleep(1); + } + + WaitForWriteFlag(); + + if (Started) { sigaction(SIGPROF, &OldSignalHandler, nullptr); - Started = false; - } - - TExecutionSampler::TSampleVector hits; - hits.reserve(Samples); - for (size_t i = 0; i < SZ; ++i) { + Started = false; + } + + TExecutionSampler::TSampleVector hits; + hits.reserve(Samples); + for (size_t i = 0; i < SZ; ++i) { if (Ips[i].first != nullptr) { - hits.push_back(Ips[i]); - } - } - stats.SavedSamples = Samples; - stats.DroppedSamples = AtomicGet(DroppedSamples); - stats.SearchSkipCount = SearchSkipCount; - AtomicUnlock(&WriteFlag); - + hits.push_back(Ips[i]); + } + } + stats.SavedSamples = Samples; + stats.DroppedSamples = AtomicGet(DroppedSamples); + stats.SearchSkipCount = SearchSkipCount; + AtomicUnlock(&WriteFlag); + Sort(hits.begin(), hits.end(), TCompareFirst()); - - sampleVector.swap(hits); - } - + + sampleVector.swap(hits); + } + void ResetStats() { - WaitForWriteFlag(); - Clear(); - AtomicUnlock(&WriteFlag); - } - -private: + WaitForWriteFlag(); + Clear(); + AtomicUnlock(&WriteFlag); + } + +private: static const size_t SZ = 2 * 1024 * 1024; // size of the hash table // inserts work faster if it's a power of 2 static const int SAMPLE_INTERVAL = 1000; // in microseconds - - struct TCompareFirst { + + struct TCompareFirst { bool operator()(const std::pair<void*, size_t>& a, const std::pair<void*, size_t>& b) const { - return a.first < b.first; - } - }; - - TExecutionSampler() - : Started(false) - , Ips(SZ) - , WriteFlag(0) - , DroppedSamples(0) - , Samples(0) - , UniqueSamples(0) - , SearchSkipCount(0) - { - } - + return a.first < b.first; + } + }; + + TExecutionSampler() + : Started(false) + , Ips(SZ) + , WriteFlag(0) + , DroppedSamples(0) + , Samples(0) + , UniqueSamples(0) + , SearchSkipCount(0) + { + } + ~TExecutionSampler() = default; - - // Signal handler is not allowed to do anything that can deadlock with activity - // on the thread to which the signal is delivered or corrupt data structures that - // were in process of update. - // One such thing is memory allocation. That's why a fixed size vector is - // preallocated at start. + + // Signal handler is not allowed to do anything that can deadlock with activity + // on the thread to which the signal is delivered or corrupt data structures that + // were in process of update. + // One such thing is memory allocation. That's why a fixed size vector is + // preallocated at start. static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { - (void)info; - if (signal != SIGPROF) { - return; - } - - ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); + (void)info; + if (signal != SIGPROF) { + return; + } + + ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); Y_ASSERT(SInstance != nullptr); - - SInstance->CaptureIP(GetIp(&ucontext->uc_mcontext)); - } - + + SInstance->CaptureIP(GetIp(&ucontext->uc_mcontext)); + } + void WaitForWriteFlag() { - // Wait for write flag to be reset - ui32 delay = 100; - while (!AtomicTryLock(&WriteFlag)) { - usleep(delay); - delay += delay; + // Wait for write flag to be reset + ui32 delay = 100; + while (!AtomicTryLock(&WriteFlag)) { + usleep(delay); + delay += delay; delay = Min(delay, (ui32)5000); - } - } - + } + } + void CaptureIP(void* rip) { - // Check if the handler on another thread is in the process of adding a sample - // If this is the case, we just drop the current sample as this should happen - // rarely. - if (AtomicTryLock(&WriteFlag)) { - AddSample(rip); - AtomicUnlock(&WriteFlag); - } else { + // Check if the handler on another thread is in the process of adding a sample + // If this is the case, we just drop the current sample as this should happen + // rarely. + if (AtomicTryLock(&WriteFlag)) { + AddSample(rip); + AtomicUnlock(&WriteFlag); + } else { AtomicIncrement(DroppedSamples); - } - } - - // Hash function applied to the addresses + } + } + + // Hash function applied to the addresses static inline ui32 Hash(void* key) { - return ((size_t)key + (size_t)key / SZ) % SZ; - } - - // Get instruction pointer from the context + return ((size_t)key + (size_t)key / SZ) % SZ; + } + + // Get instruction pointer from the context static inline void* GetIp(const mcontext_t* mctx) { -#if defined _freebsd_ +#if defined _freebsd_ #if defined _64_ - return (void*)mctx->mc_rip; + return (void*)mctx->mc_rip; #else - return (void*)mctx->mc_eip; + return (void*)mctx->mc_eip; #endif -#elif defined _linux_ +#elif defined _linux_ #if defined _64_ #if defined(_arm_) return (void*)mctx->pc; #else - return (void*)mctx->gregs[REG_RIP]; + return (void*)mctx->gregs[REG_RIP]; #endif #else - return (void*)mctx->gregs[REG_EIP]; + return (void*)mctx->gregs[REG_EIP]; #endif #elif defined _darwin_ #if defined _64_ @@ -199,67 +199,67 @@ private: #else return (void*)(*mctx)->__ss.__eip; #endif -#endif #endif - } - +#endif + } + inline bool AddSample(void* key) { - ui32 slot = Hash(key); - ui32 prevSlot = (slot - 1) % SZ; - - while (key != Ips[slot].first && !IsSlotEmpty(slot) && slot != prevSlot) { - slot = (slot + 1) % SZ; - SearchSkipCount++; - } - - if (key == Ips[slot].first) { - // increment the count - Ips[slot].second++; - ++Samples; - } else if (InsertsAllowed()) { - // add new sample and set the count to 1 - Ips[slot].first = key; - Ips[slot].second = 1; - ++UniqueSamples; - ++Samples; - } else { - // don't insert new sample if the search is becoming too slow + ui32 slot = Hash(key); + ui32 prevSlot = (slot - 1) % SZ; + + while (key != Ips[slot].first && !IsSlotEmpty(slot) && slot != prevSlot) { + slot = (slot + 1) % SZ; + SearchSkipCount++; + } + + if (key == Ips[slot].first) { + // increment the count + Ips[slot].second++; + ++Samples; + } else if (InsertsAllowed()) { + // add new sample and set the count to 1 + Ips[slot].first = key; + Ips[slot].second = 1; + ++UniqueSamples; + ++Samples; + } else { + // don't insert new sample if the search is becoming too slow AtomicIncrement(DroppedSamples); - return false; - } - - return true; - } - + return false; + } + + return true; + } + inline bool IsSlotEmpty(ui32 slot) const { return Ips[slot].first == nullptr; - } - + } + inline bool InsertsAllowed() const { - return UniqueSamples < SZ / 2; - } - - void + return UniqueSamples < SZ / 2; + } + + void Clear() { Y_ASSERT(WriteFlag == 1); - - for (size_t i = 0; i < SZ; ++i) { + + for (size_t i = 0; i < SZ; ++i) { Ips[i] = std::make_pair((void*)nullptr, (size_t)0); - } - Samples = 0; - AtomicSet(DroppedSamples, 0); - UniqueSamples = 0; - SearchSkipCount = 0; - } - + } + Samples = 0; + AtomicSet(DroppedSamples, 0); + UniqueSamples = 0; + SearchSkipCount = 0; + } + bool Started; struct sigaction OldSignalHandler; itimerval OldTimerValue; - + TVector<std::pair<void*, size_t>> Ips; // The hash table storing addresses and their hitcounts - - // TODO: on a big multiproc cache line false sharing by the flag and count might become an issue + + // TODO: on a big multiproc cache line false sharing by the flag and count might become an issue TAtomic WriteFlag; // Is used to syncronize access to the hash table TAtomic DroppedSamples; // "dropped sample" count will show how many times // a sample was dropped either because of write conflict @@ -267,87 +267,87 @@ private: ui64 Samples; // Saved samples count ui64 UniqueSamples; // Number of unique addresses ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions - - static TExecutionSampler* SInstance; -}; - -// Performs analysis of samples captured by TExecutionSampler -class TSampleAnalyser : TNonCopyable { -public: - TSampleAnalyser(TExecutionSampler::TSampleVector& samples, const TExecutionSampler::TStats& stats, bool putTimeStamps = false) - : Samples() - , Stats(stats) - , PutTimestamps(putTimeStamps) - { - Samples.swap(samples); - } - + + static TExecutionSampler* SInstance; +}; + +// Performs analysis of samples captured by TExecutionSampler +class TSampleAnalyser : TNonCopyable { +public: + TSampleAnalyser(TExecutionSampler::TSampleVector& samples, const TExecutionSampler::TStats& stats, bool putTimeStamps = false) + : Samples() + , Stats(stats) + , PutTimestamps(putTimeStamps) + { + Samples.swap(samples); + } + ~TSampleAnalyser() = default; - - void Analyze(FILE* out) const; - -private: - TExecutionSampler::TSampleVector Samples; + + void Analyze(FILE* out) const; + +private: + TExecutionSampler::TSampleVector Samples; TExecutionSampler::TStats Stats; - bool PutTimestamps; -}; - + bool PutTimestamps; +}; + void TSampleAnalyser::Analyze(FILE* out) const { fprintf(out, "samples: %" PRIu64 " unique: %" PRIu64 " dropped: %" PRIu64 " searchskips: %" PRIu64 "\n", (ui64)Stats.SavedSamples, (ui64)Samples.size(), (ui64)Stats.DroppedSamples, (ui64)Stats.SearchSkipCount); - - fprintf(out, "\nSamples:\n"); - size_t funcCnt = 0; - void* prevModBase = (void*)-1; - void* prevFunc = (void*)-1; - for (size_t i = 0; i < Samples.size(); ++i) { - // print cycle count once in a while to estimate time consumed by - // dumping the samples - if (PutTimestamps && (i % 1000 == 0)) { - ui64 tm = GetCycleCount(); + + fprintf(out, "\nSamples:\n"); + size_t funcCnt = 0; + void* prevModBase = (void*)-1; + void* prevFunc = (void*)-1; + for (size_t i = 0; i < Samples.size(); ++i) { + // print cycle count once in a while to estimate time consumed by + // dumping the samples + if (PutTimestamps && (i % 1000 == 0)) { + ui64 tm = GetCycleCount(); fprintf(out, "TM: %" PRIu64 "\n", tm); - } - - Dl_info addrInfo; - if (dladdr(Samples[i].first, &addrInfo)) { - if (addrInfo.dli_fbase != prevModBase || addrInfo.dli_saddr != prevFunc) { + } + + Dl_info addrInfo; + if (dladdr(Samples[i].first, &addrInfo)) { + if (addrInfo.dli_fbase != prevModBase || addrInfo.dli_saddr != prevFunc) { fprintf(out, "Func\t%" PRISZT "\t%p\t%p\t%s\t%s\n", funcCnt, addrInfo.dli_fbase, addrInfo.dli_saddr, addrInfo.dli_fname, addrInfo.dli_sname); - prevModBase = addrInfo.dli_fbase; - prevFunc = addrInfo.dli_saddr; - ++funcCnt; - } - } else { - fprintf(out, "[dladdr failed]\n"); - } + prevModBase = addrInfo.dli_fbase; + prevFunc = addrInfo.dli_saddr; + ++funcCnt; + } + } else { + fprintf(out, "[dladdr failed]\n"); + } fprintf(out, "%" PRISZT "\t%p\t%lu\n", i, Samples[i].first, Samples[i].second); - } -} - + } +} + TExecutionSampler* TExecutionSampler::SInstance = nullptr; - -// Starts capturing execution samples -void BeginProfiling() { - TExecutionSampler::Instance()->Start(); -} - -// Resets captured execution samples -void ResetProfile() { - TExecutionSampler::Instance()->ResetStats(); -} - -void DumpRUsage(FILE* out) { - rusage ru; - int e = getrusage(RUSAGE_SELF, &ru); - if (e != 0) - return; - - fprintf(out, + +// Starts capturing execution samples +void BeginProfiling() { + TExecutionSampler::Instance()->Start(); +} + +// Resets captured execution samples +void ResetProfile() { + TExecutionSampler::Instance()->ResetStats(); +} + +void DumpRUsage(FILE* out) { + rusage ru; + int e = getrusage(RUSAGE_SELF, &ru); + if (e != 0) + return; + + fprintf(out, "user time: %lf\n" "system time: %lf\n" "max RSS: %ld\n" @@ -372,46 +372,46 @@ void DumpRUsage(FILE* out) { ru.ru_msgsnd, ru.ru_msgrcv, ru.ru_nsignals, ru.ru_nvcsw, ru.ru_nivcsw); -} - -// Pauses capturing execution samples and dumps them to the file -// Samples are not cleared so that profiling can be continued by calling BeginProfiling() -// or it can be started from scratch by calling ResetProfile() and then BeginProfiling() -void EndProfiling(FILE* out) { - DumpRUsage(out); - - TExecutionSampler::TSampleVector samples; - TExecutionSampler::TStats stats; - TExecutionSampler::Instance()->Stop(samples, stats); - - TSampleAnalyser analyzer(samples, stats); - analyzer.Analyze(out); -} - -void EndProfiling() { - static unsigned cnt = 0; - char nameBuf[256]; - snprintf(nameBuf, sizeof(nameBuf), "./%s.%d.%u.profile", getprogname(), (int)getpid(), cnt); - FILE* out = fopen(nameBuf, "a"); - EndProfiling(out); - fclose(out); - ++cnt; -} - +} + +// Pauses capturing execution samples and dumps them to the file +// Samples are not cleared so that profiling can be continued by calling BeginProfiling() +// or it can be started from scratch by calling ResetProfile() and then BeginProfiling() +void EndProfiling(FILE* out) { + DumpRUsage(out); + + TExecutionSampler::TSampleVector samples; + TExecutionSampler::TStats stats; + TExecutionSampler::Instance()->Stop(samples, stats); + + TSampleAnalyser analyzer(samples, stats); + analyzer.Analyze(out); +} + +void EndProfiling() { + static unsigned cnt = 0; + char nameBuf[256]; + snprintf(nameBuf, sizeof(nameBuf), "./%s.%d.%u.profile", getprogname(), (int)getpid(), cnt); + FILE* out = fopen(nameBuf, "a"); + EndProfiling(out); + fclose(out); + ++cnt; +} + #else - -// NOTE: not supported on Windows - -void BeginProfiling() { -} - -void ResetProfile() { -} - + +// NOTE: not supported on Windows + +void BeginProfiling() { +} + +void ResetProfile() { +} + void EndProfiling(FILE*) { -} - -void EndProfiling() { -} - +} + +void EndProfiling() { +} + #endif |