diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/execprofile | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/execprofile')
-rw-r--r-- | library/cpp/execprofile/annotate_profile.pl | 360 | ||||
-rw-r--r-- | library/cpp/execprofile/autostart/start.cpp | 15 | ||||
-rw-r--r-- | library/cpp/execprofile/autostart/ya.make | 13 | ||||
-rw-r--r-- | library/cpp/execprofile/profile.cpp | 417 | ||||
-rw-r--r-- | library/cpp/execprofile/profile.h | 17 | ||||
-rw-r--r-- | library/cpp/execprofile/ya.make | 9 |
6 files changed, 831 insertions, 0 deletions
diff --git a/library/cpp/execprofile/annotate_profile.pl b/library/cpp/execprofile/annotate_profile.pl new file mode 100644 index 00000000000..1a8c5d65a15 --- /dev/null +++ b/library/cpp/execprofile/annotate_profile.pl @@ -0,0 +1,360 @@ +#!/usr/bin/env perl + +# +# Takes profile file as an input and prints out annotated disassmebly +# Usage: +# ./annotate_profile.pl <binary_name> <profile_name> +# + + +# Function to draw bar of the specified length filled up to specified length +sub DRAW_BAR($$) { + my ($length, $filled) = @_; + my $bar = ""; + --$filled; + while ($filled > 0) { + $bar = $bar . "X"; + $length--; + $filled--; + } + while ($length > 0) { + $bar = $bar . " "; + $length--; + } + return $bar; +} + +my $curFunc = ""; +my $curModule = ""; +my $allHits = 0; +my %moduleHits; +my %funcModule; +my %funcHits; +my %funcHottestCount; +my %funcStart; +my %funcEnd; +my %funcNames; +my %funcBaseAddrs; +my %funcSizes; +my %addrHits; +my %addrFunc; +my %moduleBaseAddr; +my @funcSortByAddr; +my %demangledNames; +my %srcLineHits; +my %srcFileHits; + +# Demagles C++ function name +sub DEMANGLE($) { + my ($name) = @_; + if (exists $demangledNames{$name}) { + return $demangledNames{$name}; + } + if ($name =~ /^_Z/) { + my $cmd = "c++filt -p \'$name\' |"; + open(my $RES, $cmd ) || die "No c++filt"; + my $demangled_name = <$RES>; + chomp($demangled_name); + close $RES; + if (length($demangled_name) !=0) { + $name = $demangled_name; + } + } + return $name; +} + +# Saves function info +sub AddFunc($$$$$) +{ + my ($func, $bin_file, $baseAddr, $size, $name) = @_; + $funcModule{$func} = $bin_file; + $funcBaseAddrs{$func} = $baseAddr; + # A function with the same base address can be mentioned multiple times with different sizes (0, and non-0, WTF??) + if ((! exists $funcSizes{$func}) || ($funcSizes{$func} < $size)) { + $funcSizes{$func} = $size; + } + $funcNames{$func} = $name; + $funcStart{$func} = $func; +# printf "%08x\t%08x\t%016x\t%s\t%s\n", +# $funcBaseAddrs{$func}, $funcSizes{$func}, $moduleBaseAddr, $funcModule{$func}, $funcNames{$func}; +} + +# Reads list of all functions in a module +sub ReadFunctionList($$) { + my ($bin_file, $moduleBaseAddr) = @_; + if (! -e $bin_file) { + return; + } + my $readelf_cmd = "readelf -W -s $bin_file |"; +# print "$readelf_cmd\n"; + my $IN_FILE; + open($IN_FILE, $readelf_cmd) || die "couldn't open the file!"; + while (my $line = <$IN_FILE>) { + chomp($line); + # " 33: 00000000000a0fc0 433 FUNC GLOBAL DEFAULT 10 getipnodebyaddr@@FBSD_1.0" + if ($line =~ m/^\s*\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+FUNC\s+\w+\s+DEFAULT\s+\d+\s+(.*)$/) { + # Read function info + my $name = $3; + my $baseAddr = hex($1) + $moduleBaseAddr; + my $func = $baseAddr; + my $size = $2; + AddFunc($func, $bin_file, $baseAddr, $size, $name); + } + } + close($IN_FILE); + @funcSortByAddr = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcBaseAddrs; +# printf "%016x\t%s\t%d\n", $moduleBaseAddr, $bin_file, $#funcSortByAddr+1; +} + +# Reads the profile and attributes address hits to the functions +sub ReadSamples() { + # First pass saves all samples in a hash-table + my $samples_file = $ARGV[1]; + my $IN_FILE; + open($IN_FILE, $samples_file)|| die "couldn't open the file!"; + my $curFuncInd = 0; + my $curFunc = 0; + my $curFuncBegin = 0; + my $curFuncEnd = 0; + my $curModule = ""; + my $curModuleBase = 0; + my $read_samples = 0; + my $samplesStarted = 0; + while (my $line = <$IN_FILE>) { + chomp($line); + + if ($line =~ m/^samples:\s+(\d+)\s+unique:\s+(\d+)\s+dropped:\s+(\d+)\s+searchskips:\s+(\d+)$/) { + $total_samples = $1; + $unique_samples = $2; + $dropped_samples = $3; + $search_skips = $4; + next; + } + + if ($line =~ m/^Samples:$/) { + $samplesStarted = 1; + next; + } elsif (!$samplesStarted) { + print "$line\n"; + next; + } + +# print "$line\n"; + if ($line =~ m/^Func\t\d+/) { + # "Func 2073 0x803323000 0x803332fd0 /lib/libthr.so.3 pthread_cond_init" + my @cols = split(/\t/, $line); + $curModule = $cols[4]; + $curModuleBase = hex($cols[2]); + if (0x400000 == $curModuleBase) { + $curModuleBase = 0; + } + $curFunc = hex($cols[3]); + if (! exists $moduleBaseAddr{$curModule}) { + $moduleBaseAddr{$curModule} = $curModuleBase; + ReadFunctionList($curModule, $curModuleBase); + } + if (! exists $funcNames{$curFunc}) { + my $name = sprintf("unknown_0x%08x", $curFunc); + AddFunc($curFunc, $curModule, $curFunc, 0, $name); + } + } elsif ($line =~ m/^\d+\t0x([0-9,a-f,A-F]+)\t(\d+)/) { + # Read one sample for the current function + $read_samples++; + my $addr = hex($1); +# print "$addr\n"; + if ($addr >= $curFuncEnd) { + # Find the function the current address belongs to + while ($curFuncInd <= $#funcSortByAddr) { + my $f = $funcSortByAddr[$curFuncInd]; + my $begin = $funcBaseAddrs{$f}; + my $end = $funcBaseAddrs{$f} + $funcSizes{$f}; + if ($begin <= $addr and $addr < $end) { + $curFunc = $f; + $funcStart{$curFunc} = $addr; + $curFuncBegin = $begin; + $curFuncEnd = $end; + last; + } elsif ($addr < $begin) { +# printf "X3: func:%08x\tname:%s\tbase:%08x\tsize:%08x\t%s\nline:%s\n", +# $curFunc, $funcNames{$curFunc}, $funcBaseAddrs{$curFunc}, $funcSizes{$curFunc}, $curModule, $line; + last; + } + ++$curFuncInd; + } + } + + $funcHits{$curFunc} += $2; + if ($funcHottestCount{$curFunc} < $2) { + $funcHottestCount{$curFunc} = $2; + } + $addrHits{$addr} = $2; + $addrFunc{$addr} = $curFunc; + $funcEnd{$curFunc} = $addr; + $allHits += $2; + $moduleHits{$curModule} += $2; + +# printf "%08x\t%08x\t%08x\t%08x\t%s\n", $addr, $curFunc, $curFuncBegin, $curFuncEnd, $funcNames{$curFunc}; + } + } + close($IN_FILE); + + printf "\nsamples: %d unique: %d dropped: %d searchskips: %d\n", $total_samples, $unique_samples, $dropped_samples, $search_skips; + if ($read_samples != $unique_samples) { + printf "\n-----------------------------------------------------------------------------------------------------\n"; + printf "!!!!WARNING: read %d samples, expected %d samples, profiling results might be not acqurate!!!!", $read_samples, $unique_samples; + printf "\n-----------------------------------------------------------------------------------------------------\n"; + } +} + +# Dumps module stats +sub DumpModules() { + # Sort functions by hit counts and dump the list + my @modules = sort {$a <=> $b } keys %moduleHits; + for (my $i = 0; $i <= $#modules; ++$i) { + my $m = $modules[$i]; + my $cnt = $moduleHits{$m}; + my $perc = 100.0 * $cnt / $allHits; + printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $m; + } +} + +# Dumps top N hot functions +sub DumpHotFunc($) { + my ($maxCnt) = @_; + # Sort functions by hit counts and dump the list + my @hotFunc = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits; +# print $#hotFunc; + for (my $i = 0; $i <= $#hotFunc && $i < $maxCnt; ++$i) { + my $f = $hotFunc[$i]; + my $cnt = $funcHits{$f}; + my $perc = 100.0 * $cnt / $allHits; + printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), DEMANGLE($funcNames{$f}); + } +} + +# Dumps top N hotspots (hot addresses) +sub DumpHotSpots($) { + my ($maxCnt) = @_; + # Sort addresses by hit counts and dump the list + my @hotSpots = sort {$addrHits{$b} <=> $addrHits{$a} } keys %addrHits; + for (my $i = 0; $i <= $#hotSpots && $i < $maxCnt; ++$i) { + my $s = $hotSpots[$i]; + my $cnt = $addrHits{$s}; + my $perc = 100.0 * $cnt / $allHits; + my $f = $addrFunc{$s}; + my $fname = $funcNames{$f}; + printf "%12d\t%6.2f%% |%s 0x%016x\t%s + 0x%x\n", + $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $s, DEMANGLE($fname), $s - $funcBaseAddrs{$f}; + } +} + +# Adds hit informations to a disassembly line +sub ANNOTATE_DISASSM($$$$) { + my ($address, $disassm, $max_hit_count, $func_hit_count) = @_; + my $hit_count = $addrHits{$address}; + my $perc = sprintf("% 7.2f%%", 100*$hit_count/$func_hit_count); + $address = sprintf("% 8x", $address); + print $address . " " . $hit_count . "\t" . $perc . " |" . + DRAW_BAR(20, 20*$hit_count/$max_hit_count) . "\t" . $disassm . "\n"; +} + +# Dumps annotated disassembly of the specified function (actually not the whole function but +# just the addresses between the first and last hit) +sub DumpDisasm($) { + my ($name) = @_; + if (exists $funcStart{$name} && exists $funcEnd{$name} && $funcStart{$name}!=0) { + my $module = $funcModule{$name}; + my $modBase = $moduleBaseAddr{$module}; + my $start_address = $funcStart{$name} - $modBase; + my $stop_address = $funcEnd{$name} - $modBase + 1; +# print " " . $funcStart{$name} . " " . $funcEnd{$name} . " $modBase ---"; + my $max_hit_count = $funcHits{$name}; + my $objdump_cmd = "objdump -C -d -l --start-address=" . $start_address . + " --stop-address=" . $stop_address . " " . $module . " |"; + if ($stop_address - $start_address < 10000000) { # don't try to disaassemble more than 10MB, because most likely it's a bug +# print STDERR $objdump_cmd . "\n"; + open(my $OBJDUMP, $objdump_cmd) || die "No objdump"; + my $srcLine = "func# ". $name; + my $srcFile = $module; + while (my $objdump_line = <$OBJDUMP>) { + # filter disassembly lines + if ($objdump_line =~ /^Disassembly of section/) { + } elsif ($objdump_line =~ m/^\s*([0-9,a-f,A-F]+):\s*(.*)/) { + my $addr = hex($1); + my $hit_count = $addrHits{$addr}; + if ($hit_count > 0) { + $srcLineHits{$srcLine} += $hit_count; + $srcFileHits{$srcFile} += $hit_count; + } + ANNOTATE_DISASSM($addr + $modBase, $2, $funcHottestCount{$name}, $max_hit_count); + } elsif ($objdump_line =~ m/^(\/.*):(\d+)$/) { + $srcLine = $objdump_line; + $srcFile = $1; + chomp($srcLine); + print $objdump_line; + } else { + print $objdump_line; + } + } + close $OBJDUMP; + } + } +} + +# Dumps disassemlby for top N hot functions +sub DumpFuncDissasm($) { + (my $maxCnt) = @_; + my @funcs = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits; + print $#funcs . "\n"; + for (my $i = 0; $i <= $#funcs && $i < $maxCnt; ++$i) { + my $f = $funcs[$i]; + print "\n--------------------------------------------------------------------------------------------------------------\n"; + printf "hits:%d\t%7.2f%%\tbase:%08x\tstart:%08x\tend:%08x\t%s\n", + $funcHits{$f}, 100*$funcHits{$f}/$allHits, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, DEMANGLE($funcNames{$f}); + print "--------------------------------------------------------------------------------------------------------------\n"; + DumpDisasm($f); + } +} + +sub DumpSrcFiles($) { + (my $maxCnt) = @_; + my @srcFiles = sort {$srcFileHits{$b} <=> $srcFileHits{$a} } keys %srcFileHits; + for (my $i = 0; $i <= $#srcFiles && $i < $maxCnt; ++$i) { + my $f = $srcFiles[$i]; + my $cnt = $srcFileHits{$f}; + printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $f; + } +} + +sub DumpSrcLines($) { + (my $maxCnt) = @_; + my @srcLines = sort {$srcLineHits{$b} <=> $srcLineHits{$a} } keys %srcLineHits; + for (my $i = 0; $i <= $#srcLines && $i < $maxCnt; ++$i) { + my $l = $srcLines[$i]; + my $cnt = $srcLineHits{$l}; + printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $l; + } +} + +ReadFunctionList($ARGV[0], 0); +ReadSamples(); +print "\nModules:\n"; +DumpModules(); +print "\nHot functions:\n"; +DumpHotFunc(100); +print "\nHotspots:\n"; +DumpHotSpots(100); +DumpFuncDissasm(100); +print "\nHot src files:\n"; +DumpSrcFiles(100); +print "\nHot src lines:\n"; +DumpSrcLines(100); + +# my @funcs = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcHits; +# printf "%d\n", $#funcs; +# for (my $i = 0; $i <= $#funcs; ++$i) { +# my $f = $funcs[$i]; +# printf "%s\t%d\tbase:%08x\tstart:%08x\tend:%08x\t%s\n", +# $funcNames{$f}, $funcHits{$f}, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, $funcModule{$f}; +# #DumpDisasm($f); +# } diff --git a/library/cpp/execprofile/autostart/start.cpp b/library/cpp/execprofile/autostart/start.cpp new file mode 100644 index 00000000000..4fffb03ae1c --- /dev/null +++ b/library/cpp/execprofile/autostart/start.cpp @@ -0,0 +1,15 @@ +#include <library/cpp/execprofile/profile.h> + +namespace { + struct TInit { + inline TInit() { + BeginProfiling(); + } + + inline ~TInit() { + EndProfiling(); + } + }; + + const TInit initer; +} diff --git a/library/cpp/execprofile/autostart/ya.make b/library/cpp/execprofile/autostart/ya.make new file mode 100644 index 00000000000..a5b61f075f9 --- /dev/null +++ b/library/cpp/execprofile/autostart/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +OWNER(pg) + +PEERDIR( + library/cpp/execprofile +) + +SRCS( + GLOBAL start.cpp +) + +END() diff --git a/library/cpp/execprofile/profile.cpp b/library/cpp/execprofile/profile.cpp new file mode 100644 index 00000000000..d05de20203f --- /dev/null +++ b/library/cpp/execprofile/profile.cpp @@ -0,0 +1,417 @@ +#include <util/system/defaults.h> + +#include "profile.h" + +#if defined(_unix_) && !defined(_bionic_) && !defined(_cygwin_) + +#include <signal.h> +#include <sys/time.h> +#include <sys/resource.h> +#if defined(_darwin_) +#include <sys/ucontext.h> +#else +#include <ucontext.h> +#endif +#include <dlfcn.h> + +#include <util/system/platform.h> +#include <util/generic/hash.h> +#include <util/generic/map.h> +#include <util/generic/noncopyable.h> +#include <util/generic/algorithm.h> +#include <util/generic/vector.h> +#include <util/stream/file.h> +#include <util/string/util.h> +#include <util/system/datetime.h> + +// This class sets SIGPROF handler and captures instruction pointer in it. +class TExecutionSampler : TNonCopyable { +public: + typedef TVector<std::pair<void*, size_t>> TSampleVector; + + struct TStats { + ui64 SavedSamples; + ui64 DroppedSamples; + ui64 SearchSkipCount; + }; + + // NOTE: There is no synchronization here as the instance is supposed to be + // created on the main thread. + static TExecutionSampler* Instance() { + if (SInstance == nullptr) { + SInstance = new TExecutionSampler(); + } + + return SInstance; + } + + void Start() { + // Set signal handler + struct sigaction sa; + sa.sa_sigaction = ProfilerSignalHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &OldSignalHandler) != 0) + return; + + // Set interval timer + itimerval tv; + tv.it_interval.tv_sec = tv.it_value.tv_sec = 0; + tv.it_interval.tv_usec = tv.it_value.tv_usec = SAMPLE_INTERVAL; + setitimer(ITIMER_PROF, &tv, &OldTimerValue); + + Started = true; + } + + void Stop(TSampleVector& sampleVector, TStats& stats) { + // Reset signal handler and timer + if (Started) { + setitimer(ITIMER_PROF, &OldTimerValue, nullptr); + sleep(1); + } + + WaitForWriteFlag(); + + if (Started) { + sigaction(SIGPROF, &OldSignalHandler, nullptr); + Started = false; + } + + TExecutionSampler::TSampleVector hits; + hits.reserve(Samples); + for (size_t i = 0; i < SZ; ++i) { + if (Ips[i].first != nullptr) { + hits.push_back(Ips[i]); + } + } + stats.SavedSamples = Samples; + stats.DroppedSamples = AtomicGet(DroppedSamples); + stats.SearchSkipCount = SearchSkipCount; + AtomicUnlock(&WriteFlag); + + Sort(hits.begin(), hits.end(), TCompareFirst()); + + sampleVector.swap(hits); + } + + void ResetStats() { + WaitForWriteFlag(); + Clear(); + AtomicUnlock(&WriteFlag); + } + +private: + static const size_t SZ = 2 * 1024 * 1024; // size of the hash table + // inserts work faster if it's a power of 2 + static const int SAMPLE_INTERVAL = 1000; // in microseconds + + struct TCompareFirst { + bool operator()(const std::pair<void*, size_t>& a, const std::pair<void*, size_t>& b) const { + return a.first < b.first; + } + }; + + TExecutionSampler() + : Started(false) + , Ips(SZ) + , WriteFlag(0) + , DroppedSamples(0) + , Samples(0) + , UniqueSamples(0) + , SearchSkipCount(0) + { + } + + ~TExecutionSampler() = default; + + // Signal handler is not allowed to do anything that can deadlock with activity + // on the thread to which the signal is delivered or corrupt data structures that + // were in process of update. + // One such thing is memory allocation. That's why a fixed size vector is + // preallocated at start. + static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { + (void)info; + if (signal != SIGPROF) { + return; + } + + ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); + Y_ASSERT(SInstance != nullptr); + + SInstance->CaptureIP(GetIp(&ucontext->uc_mcontext)); + } + + void WaitForWriteFlag() { + // Wait for write flag to be reset + ui32 delay = 100; + while (!AtomicTryLock(&WriteFlag)) { + usleep(delay); + delay += delay; + delay = Min(delay, (ui32)5000); + } + } + + void CaptureIP(void* rip) { + // Check if the handler on another thread is in the process of adding a sample + // If this is the case, we just drop the current sample as this should happen + // rarely. + if (AtomicTryLock(&WriteFlag)) { + AddSample(rip); + AtomicUnlock(&WriteFlag); + } else { + AtomicIncrement(DroppedSamples); + } + } + + // Hash function applied to the addresses + static inline ui32 Hash(void* key) { + return ((size_t)key + (size_t)key / SZ) % SZ; + } + + // Get instruction pointer from the context + static inline void* GetIp(const mcontext_t* mctx) { +#if defined _freebsd_ +#if defined _64_ + return (void*)mctx->mc_rip; +#else + return (void*)mctx->mc_eip; +#endif +#elif defined _linux_ +#if defined _64_ +#if defined(_arm_) + return (void*)mctx->pc; +#else + return (void*)mctx->gregs[REG_RIP]; +#endif +#else + return (void*)mctx->gregs[REG_EIP]; +#endif +#elif defined _darwin_ +#if defined _64_ +#if defined(_arm_) + return (void*)(*mctx)->__ss.__pc; +#else + return (void*)(*mctx)->__ss.__rip; +#endif +#else +#if defined(__IOS__) + return (void*)(*mctx)->__ss.__pc; +#else + return (void*)(*mctx)->__ss.__eip; +#endif +#endif +#endif + } + + inline bool AddSample(void* key) { + ui32 slot = Hash(key); + ui32 prevSlot = (slot - 1) % SZ; + + while (key != Ips[slot].first && !IsSlotEmpty(slot) && slot != prevSlot) { + slot = (slot + 1) % SZ; + SearchSkipCount++; + } + + if (key == Ips[slot].first) { + // increment the count + Ips[slot].second++; + ++Samples; + } else if (InsertsAllowed()) { + // add new sample and set the count to 1 + Ips[slot].first = key; + Ips[slot].second = 1; + ++UniqueSamples; + ++Samples; + } else { + // don't insert new sample if the search is becoming too slow + AtomicIncrement(DroppedSamples); + return false; + } + + return true; + } + + inline bool IsSlotEmpty(ui32 slot) const { + return Ips[slot].first == nullptr; + } + + inline bool InsertsAllowed() const { + return UniqueSamples < SZ / 2; + } + + void + Clear() { + Y_ASSERT(WriteFlag == 1); + + for (size_t i = 0; i < SZ; ++i) { + Ips[i] = std::make_pair((void*)nullptr, (size_t)0); + } + Samples = 0; + AtomicSet(DroppedSamples, 0); + UniqueSamples = 0; + SearchSkipCount = 0; + } + + bool Started; + struct sigaction OldSignalHandler; + itimerval OldTimerValue; + + TVector<std::pair<void*, size_t>> + Ips; // The hash table storing addresses and their hitcounts + + // TODO: on a big multiproc cache line false sharing by the flag and count might become an issue + TAtomic WriteFlag; // Is used to syncronize access to the hash table + TAtomic DroppedSamples; // "dropped sample" count will show how many times + // a sample was dropped either because of write conflict + // or because of the hash table had become too filled up + ui64 Samples; // Saved samples count + ui64 UniqueSamples; // Number of unique addresses + ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions + + static TExecutionSampler* SInstance; +}; + +// Performs analysis of samples captured by TExecutionSampler +class TSampleAnalyser : TNonCopyable { +public: + TSampleAnalyser(TExecutionSampler::TSampleVector& samples, const TExecutionSampler::TStats& stats, bool putTimeStamps = false) + : Samples() + , Stats(stats) + , PutTimestamps(putTimeStamps) + { + Samples.swap(samples); + } + + ~TSampleAnalyser() = default; + + void Analyze(FILE* out) const; + +private: + TExecutionSampler::TSampleVector Samples; + TExecutionSampler::TStats Stats; + bool PutTimestamps; +}; + +void TSampleAnalyser::Analyze(FILE* out) const { + fprintf(out, "samples: %" PRIu64 " unique: %" PRIu64 " dropped: %" PRIu64 " searchskips: %" PRIu64 "\n", + (ui64)Stats.SavedSamples, (ui64)Samples.size(), + (ui64)Stats.DroppedSamples, (ui64)Stats.SearchSkipCount); + + fprintf(out, "\nSamples:\n"); + size_t funcCnt = 0; + void* prevModBase = (void*)-1; + void* prevFunc = (void*)-1; + for (size_t i = 0; i < Samples.size(); ++i) { + // print cycle count once in a while to estimate time consumed by + // dumping the samples + if (PutTimestamps && (i % 1000 == 0)) { + ui64 tm = GetCycleCount(); + fprintf(out, "TM: %" PRIu64 "\n", tm); + } + + Dl_info addrInfo; + if (dladdr(Samples[i].first, &addrInfo)) { + if (addrInfo.dli_fbase != prevModBase || addrInfo.dli_saddr != prevFunc) { + fprintf(out, "Func\t%" PRISZT "\t%p\t%p\t%s\t%s\n", + funcCnt, + addrInfo.dli_fbase, + addrInfo.dli_saddr, + addrInfo.dli_fname, + addrInfo.dli_sname); + prevModBase = addrInfo.dli_fbase; + prevFunc = addrInfo.dli_saddr; + ++funcCnt; + } + } else { + fprintf(out, "[dladdr failed]\n"); + } + fprintf(out, "%" PRISZT "\t%p\t%lu\n", i, Samples[i].first, Samples[i].second); + } +} + +TExecutionSampler* TExecutionSampler::SInstance = nullptr; + +// Starts capturing execution samples +void BeginProfiling() { + TExecutionSampler::Instance()->Start(); +} + +// Resets captured execution samples +void ResetProfile() { + TExecutionSampler::Instance()->ResetStats(); +} + +void DumpRUsage(FILE* out) { + rusage ru; + int e = getrusage(RUSAGE_SELF, &ru); + if (e != 0) + return; + + fprintf(out, + "user time: %lf\n" + "system time: %lf\n" + "max RSS: %ld\n" + "shared text: %ld\n" + "unshared data: %ld\n" + "unshared stack: %ld\n" + "page reclaims: %ld\n" + "page faults: %ld\n" + "swaps: %ld\n" + "block input ops: %ld\n" + "block output ops: %ld\n" + "msg sent: %ld\n" + "msg received: %ld\n" + "signals: %ld\n" + "voluntary ctx switches: %ld\n" + "involuntary ctx switches: %ld\n\n", + ru.ru_utime.tv_sec + (ru.ru_utime.tv_usec * 0.000001), + ru.ru_stime.tv_sec + (ru.ru_stime.tv_usec * 0.000001), + ru.ru_maxrss, ru.ru_ixrss, ru.ru_idrss, ru.ru_isrss, + ru.ru_minflt, ru.ru_majflt, ru.ru_nswap, + ru.ru_inblock, ru.ru_oublock, + ru.ru_msgsnd, ru.ru_msgrcv, + ru.ru_nsignals, + ru.ru_nvcsw, ru.ru_nivcsw); +} + +// Pauses capturing execution samples and dumps them to the file +// Samples are not cleared so that profiling can be continued by calling BeginProfiling() +// or it can be started from scratch by calling ResetProfile() and then BeginProfiling() +void EndProfiling(FILE* out) { + DumpRUsage(out); + + TExecutionSampler::TSampleVector samples; + TExecutionSampler::TStats stats; + TExecutionSampler::Instance()->Stop(samples, stats); + + TSampleAnalyser analyzer(samples, stats); + analyzer.Analyze(out); +} + +void EndProfiling() { + static unsigned cnt = 0; + char nameBuf[256]; + snprintf(nameBuf, sizeof(nameBuf), "./%s.%d.%u.profile", getprogname(), (int)getpid(), cnt); + FILE* out = fopen(nameBuf, "a"); + EndProfiling(out); + fclose(out); + ++cnt; +} + +#else + +// NOTE: not supported on Windows + +void BeginProfiling() { +} + +void ResetProfile() { +} + +void EndProfiling(FILE*) { +} + +void EndProfiling() { +} + +#endif diff --git a/library/cpp/execprofile/profile.h b/library/cpp/execprofile/profile.h new file mode 100644 index 00000000000..ccb88666565 --- /dev/null +++ b/library/cpp/execprofile/profile.h @@ -0,0 +1,17 @@ +#pragma once + +#include <stdio.h> + +// Starts capturing execution samples +void BeginProfiling(); + +// Resets captured execution samples +void ResetProfile(); + +// Pauses capturing execution samples and dumps them to the file +// Samples are not cleared so that profiling can be continued by calling BeginProfiling() +// or it can be started from scratch by calling ResetProfile() and then BeginProfiling() +void EndProfiling(FILE* out); + +// Dumps the profile to default file (basename.pid.N.profile) +void EndProfiling(); diff --git a/library/cpp/execprofile/ya.make b/library/cpp/execprofile/ya.make new file mode 100644 index 00000000000..9d202ac4eb2 --- /dev/null +++ b/library/cpp/execprofile/ya.make @@ -0,0 +1,9 @@ +OWNER(g:cpp-contrib) + +LIBRARY() + +SRCS( + profile.cpp +) + +END() |