aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/execprofile
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/execprofile
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/execprofile')
-rw-r--r--library/cpp/execprofile/annotate_profile.pl360
-rw-r--r--library/cpp/execprofile/autostart/start.cpp15
-rw-r--r--library/cpp/execprofile/autostart/ya.make13
-rw-r--r--library/cpp/execprofile/profile.cpp417
-rw-r--r--library/cpp/execprofile/profile.h17
-rw-r--r--library/cpp/execprofile/ya.make9
6 files changed, 831 insertions, 0 deletions
diff --git a/library/cpp/execprofile/annotate_profile.pl b/library/cpp/execprofile/annotate_profile.pl
new file mode 100644
index 00000000000..1a8c5d65a15
--- /dev/null
+++ b/library/cpp/execprofile/annotate_profile.pl
@@ -0,0 +1,360 @@
+#!/usr/bin/env perl
+
+#
+# Takes profile file as an input and prints out annotated disassmebly
+# Usage:
+# ./annotate_profile.pl <binary_name> <profile_name>
+#
+
+
+# Function to draw bar of the specified length filled up to specified length
+sub DRAW_BAR($$) {
+ my ($length, $filled) = @_;
+ my $bar = "";
+ --$filled;
+ while ($filled > 0) {
+ $bar = $bar . "X";
+ $length--;
+ $filled--;
+ }
+ while ($length > 0) {
+ $bar = $bar . " ";
+ $length--;
+ }
+ return $bar;
+}
+
+my $curFunc = "";
+my $curModule = "";
+my $allHits = 0;
+my %moduleHits;
+my %funcModule;
+my %funcHits;
+my %funcHottestCount;
+my %funcStart;
+my %funcEnd;
+my %funcNames;
+my %funcBaseAddrs;
+my %funcSizes;
+my %addrHits;
+my %addrFunc;
+my %moduleBaseAddr;
+my @funcSortByAddr;
+my %demangledNames;
+my %srcLineHits;
+my %srcFileHits;
+
+# Demagles C++ function name
+sub DEMANGLE($) {
+ my ($name) = @_;
+ if (exists $demangledNames{$name}) {
+ return $demangledNames{$name};
+ }
+ if ($name =~ /^_Z/) {
+ my $cmd = "c++filt -p \'$name\' |";
+ open(my $RES, $cmd ) || die "No c++filt";
+ my $demangled_name = <$RES>;
+ chomp($demangled_name);
+ close $RES;
+ if (length($demangled_name) !=0) {
+ $name = $demangled_name;
+ }
+ }
+ return $name;
+}
+
+# Saves function info
+sub AddFunc($$$$$)
+{
+ my ($func, $bin_file, $baseAddr, $size, $name) = @_;
+ $funcModule{$func} = $bin_file;
+ $funcBaseAddrs{$func} = $baseAddr;
+ # A function with the same base address can be mentioned multiple times with different sizes (0, and non-0, WTF??)
+ if ((! exists $funcSizes{$func}) || ($funcSizes{$func} < $size)) {
+ $funcSizes{$func} = $size;
+ }
+ $funcNames{$func} = $name;
+ $funcStart{$func} = $func;
+# printf "%08x\t%08x\t%016x\t%s\t%s\n",
+# $funcBaseAddrs{$func}, $funcSizes{$func}, $moduleBaseAddr, $funcModule{$func}, $funcNames{$func};
+}
+
+# Reads list of all functions in a module
+sub ReadFunctionList($$) {
+ my ($bin_file, $moduleBaseAddr) = @_;
+ if (! -e $bin_file) {
+ return;
+ }
+ my $readelf_cmd = "readelf -W -s $bin_file |";
+# print "$readelf_cmd\n";
+ my $IN_FILE;
+ open($IN_FILE, $readelf_cmd) || die "couldn't open the file!";
+ while (my $line = <$IN_FILE>) {
+ chomp($line);
+ # " 33: 00000000000a0fc0 433 FUNC GLOBAL DEFAULT 10 getipnodebyaddr@@FBSD_1.0"
+ if ($line =~ m/^\s*\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+FUNC\s+\w+\s+DEFAULT\s+\d+\s+(.*)$/) {
+ # Read function info
+ my $name = $3;
+ my $baseAddr = hex($1) + $moduleBaseAddr;
+ my $func = $baseAddr;
+ my $size = $2;
+ AddFunc($func, $bin_file, $baseAddr, $size, $name);
+ }
+ }
+ close($IN_FILE);
+ @funcSortByAddr = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcBaseAddrs;
+# printf "%016x\t%s\t%d\n", $moduleBaseAddr, $bin_file, $#funcSortByAddr+1;
+}
+
+# Reads the profile and attributes address hits to the functions
+sub ReadSamples() {
+ # First pass saves all samples in a hash-table
+ my $samples_file = $ARGV[1];
+ my $IN_FILE;
+ open($IN_FILE, $samples_file)|| die "couldn't open the file!";
+ my $curFuncInd = 0;
+ my $curFunc = 0;
+ my $curFuncBegin = 0;
+ my $curFuncEnd = 0;
+ my $curModule = "";
+ my $curModuleBase = 0;
+ my $read_samples = 0;
+ my $samplesStarted = 0;
+ while (my $line = <$IN_FILE>) {
+ chomp($line);
+
+ if ($line =~ m/^samples:\s+(\d+)\s+unique:\s+(\d+)\s+dropped:\s+(\d+)\s+searchskips:\s+(\d+)$/) {
+ $total_samples = $1;
+ $unique_samples = $2;
+ $dropped_samples = $3;
+ $search_skips = $4;
+ next;
+ }
+
+ if ($line =~ m/^Samples:$/) {
+ $samplesStarted = 1;
+ next;
+ } elsif (!$samplesStarted) {
+ print "$line\n";
+ next;
+ }
+
+# print "$line\n";
+ if ($line =~ m/^Func\t\d+/) {
+ # "Func 2073 0x803323000 0x803332fd0 /lib/libthr.so.3 pthread_cond_init"
+ my @cols = split(/\t/, $line);
+ $curModule = $cols[4];
+ $curModuleBase = hex($cols[2]);
+ if (0x400000 == $curModuleBase) {
+ $curModuleBase = 0;
+ }
+ $curFunc = hex($cols[3]);
+ if (! exists $moduleBaseAddr{$curModule}) {
+ $moduleBaseAddr{$curModule} = $curModuleBase;
+ ReadFunctionList($curModule, $curModuleBase);
+ }
+ if (! exists $funcNames{$curFunc}) {
+ my $name = sprintf("unknown_0x%08x", $curFunc);
+ AddFunc($curFunc, $curModule, $curFunc, 0, $name);
+ }
+ } elsif ($line =~ m/^\d+\t0x([0-9,a-f,A-F]+)\t(\d+)/) {
+ # Read one sample for the current function
+ $read_samples++;
+ my $addr = hex($1);
+# print "$addr\n";
+ if ($addr >= $curFuncEnd) {
+ # Find the function the current address belongs to
+ while ($curFuncInd <= $#funcSortByAddr) {
+ my $f = $funcSortByAddr[$curFuncInd];
+ my $begin = $funcBaseAddrs{$f};
+ my $end = $funcBaseAddrs{$f} + $funcSizes{$f};
+ if ($begin <= $addr and $addr < $end) {
+ $curFunc = $f;
+ $funcStart{$curFunc} = $addr;
+ $curFuncBegin = $begin;
+ $curFuncEnd = $end;
+ last;
+ } elsif ($addr < $begin) {
+# printf "X3: func:%08x\tname:%s\tbase:%08x\tsize:%08x\t%s\nline:%s\n",
+# $curFunc, $funcNames{$curFunc}, $funcBaseAddrs{$curFunc}, $funcSizes{$curFunc}, $curModule, $line;
+ last;
+ }
+ ++$curFuncInd;
+ }
+ }
+
+ $funcHits{$curFunc} += $2;
+ if ($funcHottestCount{$curFunc} < $2) {
+ $funcHottestCount{$curFunc} = $2;
+ }
+ $addrHits{$addr} = $2;
+ $addrFunc{$addr} = $curFunc;
+ $funcEnd{$curFunc} = $addr;
+ $allHits += $2;
+ $moduleHits{$curModule} += $2;
+
+# printf "%08x\t%08x\t%08x\t%08x\t%s\n", $addr, $curFunc, $curFuncBegin, $curFuncEnd, $funcNames{$curFunc};
+ }
+ }
+ close($IN_FILE);
+
+ printf "\nsamples: %d unique: %d dropped: %d searchskips: %d\n", $total_samples, $unique_samples, $dropped_samples, $search_skips;
+ if ($read_samples != $unique_samples) {
+ printf "\n-----------------------------------------------------------------------------------------------------\n";
+ printf "!!!!WARNING: read %d samples, expected %d samples, profiling results might be not acqurate!!!!", $read_samples, $unique_samples;
+ printf "\n-----------------------------------------------------------------------------------------------------\n";
+ }
+}
+
+# Dumps module stats
+sub DumpModules() {
+ # Sort functions by hit counts and dump the list
+ my @modules = sort {$a <=> $b } keys %moduleHits;
+ for (my $i = 0; $i <= $#modules; ++$i) {
+ my $m = $modules[$i];
+ my $cnt = $moduleHits{$m};
+ my $perc = 100.0 * $cnt / $allHits;
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $m;
+ }
+}
+
+# Dumps top N hot functions
+sub DumpHotFunc($) {
+ my ($maxCnt) = @_;
+ # Sort functions by hit counts and dump the list
+ my @hotFunc = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
+# print $#hotFunc;
+ for (my $i = 0; $i <= $#hotFunc && $i < $maxCnt; ++$i) {
+ my $f = $hotFunc[$i];
+ my $cnt = $funcHits{$f};
+ my $perc = 100.0 * $cnt / $allHits;
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), DEMANGLE($funcNames{$f});
+ }
+}
+
+# Dumps top N hotspots (hot addresses)
+sub DumpHotSpots($) {
+ my ($maxCnt) = @_;
+ # Sort addresses by hit counts and dump the list
+ my @hotSpots = sort {$addrHits{$b} <=> $addrHits{$a} } keys %addrHits;
+ for (my $i = 0; $i <= $#hotSpots && $i < $maxCnt; ++$i) {
+ my $s = $hotSpots[$i];
+ my $cnt = $addrHits{$s};
+ my $perc = 100.0 * $cnt / $allHits;
+ my $f = $addrFunc{$s};
+ my $fname = $funcNames{$f};
+ printf "%12d\t%6.2f%% |%s 0x%016x\t%s + 0x%x\n",
+ $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $s, DEMANGLE($fname), $s - $funcBaseAddrs{$f};
+ }
+}
+
+# Adds hit informations to a disassembly line
+sub ANNOTATE_DISASSM($$$$) {
+ my ($address, $disassm, $max_hit_count, $func_hit_count) = @_;
+ my $hit_count = $addrHits{$address};
+ my $perc = sprintf("% 7.2f%%", 100*$hit_count/$func_hit_count);
+ $address = sprintf("% 8x", $address);
+ print $address . " " . $hit_count . "\t" . $perc . " |" .
+ DRAW_BAR(20, 20*$hit_count/$max_hit_count) . "\t" . $disassm . "\n";
+}
+
+# Dumps annotated disassembly of the specified function (actually not the whole function but
+# just the addresses between the first and last hit)
+sub DumpDisasm($) {
+ my ($name) = @_;
+ if (exists $funcStart{$name} && exists $funcEnd{$name} && $funcStart{$name}!=0) {
+ my $module = $funcModule{$name};
+ my $modBase = $moduleBaseAddr{$module};
+ my $start_address = $funcStart{$name} - $modBase;
+ my $stop_address = $funcEnd{$name} - $modBase + 1;
+# print " " . $funcStart{$name} . " " . $funcEnd{$name} . " $modBase ---";
+ my $max_hit_count = $funcHits{$name};
+ my $objdump_cmd = "objdump -C -d -l --start-address=" . $start_address .
+ " --stop-address=" . $stop_address . " " . $module . " |";
+ if ($stop_address - $start_address < 10000000) { # don't try to disaassemble more than 10MB, because most likely it's a bug
+# print STDERR $objdump_cmd . "\n";
+ open(my $OBJDUMP, $objdump_cmd) || die "No objdump";
+ my $srcLine = "func# ". $name;
+ my $srcFile = $module;
+ while (my $objdump_line = <$OBJDUMP>) {
+ # filter disassembly lines
+ if ($objdump_line =~ /^Disassembly of section/) {
+ } elsif ($objdump_line =~ m/^\s*([0-9,a-f,A-F]+):\s*(.*)/) {
+ my $addr = hex($1);
+ my $hit_count = $addrHits{$addr};
+ if ($hit_count > 0) {
+ $srcLineHits{$srcLine} += $hit_count;
+ $srcFileHits{$srcFile} += $hit_count;
+ }
+ ANNOTATE_DISASSM($addr + $modBase, $2, $funcHottestCount{$name}, $max_hit_count);
+ } elsif ($objdump_line =~ m/^(\/.*):(\d+)$/) {
+ $srcLine = $objdump_line;
+ $srcFile = $1;
+ chomp($srcLine);
+ print $objdump_line;
+ } else {
+ print $objdump_line;
+ }
+ }
+ close $OBJDUMP;
+ }
+ }
+}
+
+# Dumps disassemlby for top N hot functions
+sub DumpFuncDissasm($) {
+ (my $maxCnt) = @_;
+ my @funcs = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
+ print $#funcs . "\n";
+ for (my $i = 0; $i <= $#funcs && $i < $maxCnt; ++$i) {
+ my $f = $funcs[$i];
+ print "\n--------------------------------------------------------------------------------------------------------------\n";
+ printf "hits:%d\t%7.2f%%\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
+ $funcHits{$f}, 100*$funcHits{$f}/$allHits, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, DEMANGLE($funcNames{$f});
+ print "--------------------------------------------------------------------------------------------------------------\n";
+ DumpDisasm($f);
+ }
+}
+
+sub DumpSrcFiles($) {
+ (my $maxCnt) = @_;
+ my @srcFiles = sort {$srcFileHits{$b} <=> $srcFileHits{$a} } keys %srcFileHits;
+ for (my $i = 0; $i <= $#srcFiles && $i < $maxCnt; ++$i) {
+ my $f = $srcFiles[$i];
+ my $cnt = $srcFileHits{$f};
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $f;
+ }
+}
+
+sub DumpSrcLines($) {
+ (my $maxCnt) = @_;
+ my @srcLines = sort {$srcLineHits{$b} <=> $srcLineHits{$a} } keys %srcLineHits;
+ for (my $i = 0; $i <= $#srcLines && $i < $maxCnt; ++$i) {
+ my $l = $srcLines[$i];
+ my $cnt = $srcLineHits{$l};
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $l;
+ }
+}
+
+ReadFunctionList($ARGV[0], 0);
+ReadSamples();
+print "\nModules:\n";
+DumpModules();
+print "\nHot functions:\n";
+DumpHotFunc(100);
+print "\nHotspots:\n";
+DumpHotSpots(100);
+DumpFuncDissasm(100);
+print "\nHot src files:\n";
+DumpSrcFiles(100);
+print "\nHot src lines:\n";
+DumpSrcLines(100);
+
+# my @funcs = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcHits;
+# printf "%d\n", $#funcs;
+# for (my $i = 0; $i <= $#funcs; ++$i) {
+# my $f = $funcs[$i];
+# printf "%s\t%d\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
+# $funcNames{$f}, $funcHits{$f}, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, $funcModule{$f};
+# #DumpDisasm($f);
+# }
diff --git a/library/cpp/execprofile/autostart/start.cpp b/library/cpp/execprofile/autostart/start.cpp
new file mode 100644
index 00000000000..4fffb03ae1c
--- /dev/null
+++ b/library/cpp/execprofile/autostart/start.cpp
@@ -0,0 +1,15 @@
+#include <library/cpp/execprofile/profile.h>
+
+namespace {
+ struct TInit {
+ inline TInit() {
+ BeginProfiling();
+ }
+
+ inline ~TInit() {
+ EndProfiling();
+ }
+ };
+
+ const TInit initer;
+}
diff --git a/library/cpp/execprofile/autostart/ya.make b/library/cpp/execprofile/autostart/ya.make
new file mode 100644
index 00000000000..a5b61f075f9
--- /dev/null
+++ b/library/cpp/execprofile/autostart/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+OWNER(pg)
+
+PEERDIR(
+ library/cpp/execprofile
+)
+
+SRCS(
+ GLOBAL start.cpp
+)
+
+END()
diff --git a/library/cpp/execprofile/profile.cpp b/library/cpp/execprofile/profile.cpp
new file mode 100644
index 00000000000..d05de20203f
--- /dev/null
+++ b/library/cpp/execprofile/profile.cpp
@@ -0,0 +1,417 @@
+#include <util/system/defaults.h>
+
+#include "profile.h"
+
+#if defined(_unix_) && !defined(_bionic_) && !defined(_cygwin_)
+
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#if defined(_darwin_)
+#include <sys/ucontext.h>
+#else
+#include <ucontext.h>
+#endif
+#include <dlfcn.h>
+
+#include <util/system/platform.h>
+#include <util/generic/hash.h>
+#include <util/generic/map.h>
+#include <util/generic/noncopyable.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/vector.h>
+#include <util/stream/file.h>
+#include <util/string/util.h>
+#include <util/system/datetime.h>
+
+// This class sets SIGPROF handler and captures instruction pointer in it.
+class TExecutionSampler : TNonCopyable {
+public:
+ typedef TVector<std::pair<void*, size_t>> TSampleVector;
+
+ struct TStats {
+ ui64 SavedSamples;
+ ui64 DroppedSamples;
+ ui64 SearchSkipCount;
+ };
+
+ // NOTE: There is no synchronization here as the instance is supposed to be
+ // created on the main thread.
+ static TExecutionSampler* Instance() {
+ if (SInstance == nullptr) {
+ SInstance = new TExecutionSampler();
+ }
+
+ return SInstance;
+ }
+
+ void Start() {
+ // Set signal handler
+ struct sigaction sa;
+ sa.sa_sigaction = ProfilerSignalHandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGPROF, &sa, &OldSignalHandler) != 0)
+ return;
+
+ // Set interval timer
+ itimerval tv;
+ tv.it_interval.tv_sec = tv.it_value.tv_sec = 0;
+ tv.it_interval.tv_usec = tv.it_value.tv_usec = SAMPLE_INTERVAL;
+ setitimer(ITIMER_PROF, &tv, &OldTimerValue);
+
+ Started = true;
+ }
+
+ void Stop(TSampleVector& sampleVector, TStats& stats) {
+ // Reset signal handler and timer
+ if (Started) {
+ setitimer(ITIMER_PROF, &OldTimerValue, nullptr);
+ sleep(1);
+ }
+
+ WaitForWriteFlag();
+
+ if (Started) {
+ sigaction(SIGPROF, &OldSignalHandler, nullptr);
+ Started = false;
+ }
+
+ TExecutionSampler::TSampleVector hits;
+ hits.reserve(Samples);
+ for (size_t i = 0; i < SZ; ++i) {
+ if (Ips[i].first != nullptr) {
+ hits.push_back(Ips[i]);
+ }
+ }
+ stats.SavedSamples = Samples;
+ stats.DroppedSamples = AtomicGet(DroppedSamples);
+ stats.SearchSkipCount = SearchSkipCount;
+ AtomicUnlock(&WriteFlag);
+
+ Sort(hits.begin(), hits.end(), TCompareFirst());
+
+ sampleVector.swap(hits);
+ }
+
+ void ResetStats() {
+ WaitForWriteFlag();
+ Clear();
+ AtomicUnlock(&WriteFlag);
+ }
+
+private:
+ static const size_t SZ = 2 * 1024 * 1024; // size of the hash table
+ // inserts work faster if it's a power of 2
+ static const int SAMPLE_INTERVAL = 1000; // in microseconds
+
+ struct TCompareFirst {
+ bool operator()(const std::pair<void*, size_t>& a, const std::pair<void*, size_t>& b) const {
+ return a.first < b.first;
+ }
+ };
+
+ TExecutionSampler()
+ : Started(false)
+ , Ips(SZ)
+ , WriteFlag(0)
+ , DroppedSamples(0)
+ , Samples(0)
+ , UniqueSamples(0)
+ , SearchSkipCount(0)
+ {
+ }
+
+ ~TExecutionSampler() = default;
+
+ // Signal handler is not allowed to do anything that can deadlock with activity
+ // on the thread to which the signal is delivered or corrupt data structures that
+ // were in process of update.
+ // One such thing is memory allocation. That's why a fixed size vector is
+ // preallocated at start.
+ static void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
+ (void)info;
+ if (signal != SIGPROF) {
+ return;
+ }
+
+ ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context);
+ Y_ASSERT(SInstance != nullptr);
+
+ SInstance->CaptureIP(GetIp(&ucontext->uc_mcontext));
+ }
+
+ void WaitForWriteFlag() {
+ // Wait for write flag to be reset
+ ui32 delay = 100;
+ while (!AtomicTryLock(&WriteFlag)) {
+ usleep(delay);
+ delay += delay;
+ delay = Min(delay, (ui32)5000);
+ }
+ }
+
+ void CaptureIP(void* rip) {
+ // Check if the handler on another thread is in the process of adding a sample
+ // If this is the case, we just drop the current sample as this should happen
+ // rarely.
+ if (AtomicTryLock(&WriteFlag)) {
+ AddSample(rip);
+ AtomicUnlock(&WriteFlag);
+ } else {
+ AtomicIncrement(DroppedSamples);
+ }
+ }
+
+ // Hash function applied to the addresses
+ static inline ui32 Hash(void* key) {
+ return ((size_t)key + (size_t)key / SZ) % SZ;
+ }
+
+ // Get instruction pointer from the context
+ static inline void* GetIp(const mcontext_t* mctx) {
+#if defined _freebsd_
+#if defined _64_
+ return (void*)mctx->mc_rip;
+#else
+ return (void*)mctx->mc_eip;
+#endif
+#elif defined _linux_
+#if defined _64_
+#if defined(_arm_)
+ return (void*)mctx->pc;
+#else
+ return (void*)mctx->gregs[REG_RIP];
+#endif
+#else
+ return (void*)mctx->gregs[REG_EIP];
+#endif
+#elif defined _darwin_
+#if defined _64_
+#if defined(_arm_)
+ return (void*)(*mctx)->__ss.__pc;
+#else
+ return (void*)(*mctx)->__ss.__rip;
+#endif
+#else
+#if defined(__IOS__)
+ return (void*)(*mctx)->__ss.__pc;
+#else
+ return (void*)(*mctx)->__ss.__eip;
+#endif
+#endif
+#endif
+ }
+
+ inline bool AddSample(void* key) {
+ ui32 slot = Hash(key);
+ ui32 prevSlot = (slot - 1) % SZ;
+
+ while (key != Ips[slot].first && !IsSlotEmpty(slot) && slot != prevSlot) {
+ slot = (slot + 1) % SZ;
+ SearchSkipCount++;
+ }
+
+ if (key == Ips[slot].first) {
+ // increment the count
+ Ips[slot].second++;
+ ++Samples;
+ } else if (InsertsAllowed()) {
+ // add new sample and set the count to 1
+ Ips[slot].first = key;
+ Ips[slot].second = 1;
+ ++UniqueSamples;
+ ++Samples;
+ } else {
+ // don't insert new sample if the search is becoming too slow
+ AtomicIncrement(DroppedSamples);
+ return false;
+ }
+
+ return true;
+ }
+
+ inline bool IsSlotEmpty(ui32 slot) const {
+ return Ips[slot].first == nullptr;
+ }
+
+ inline bool InsertsAllowed() const {
+ return UniqueSamples < SZ / 2;
+ }
+
+ void
+ Clear() {
+ Y_ASSERT(WriteFlag == 1);
+
+ for (size_t i = 0; i < SZ; ++i) {
+ Ips[i] = std::make_pair((void*)nullptr, (size_t)0);
+ }
+ Samples = 0;
+ AtomicSet(DroppedSamples, 0);
+ UniqueSamples = 0;
+ SearchSkipCount = 0;
+ }
+
+ bool Started;
+ struct sigaction OldSignalHandler;
+ itimerval OldTimerValue;
+
+ TVector<std::pair<void*, size_t>>
+ Ips; // The hash table storing addresses and their hitcounts
+
+ // TODO: on a big multiproc cache line false sharing by the flag and count might become an issue
+ TAtomic WriteFlag; // Is used to syncronize access to the hash table
+ TAtomic DroppedSamples; // "dropped sample" count will show how many times
+ // a sample was dropped either because of write conflict
+ // or because of the hash table had become too filled up
+ ui64 Samples; // Saved samples count
+ ui64 UniqueSamples; // Number of unique addresses
+ ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions
+
+ static TExecutionSampler* SInstance;
+};
+
+// Performs analysis of samples captured by TExecutionSampler
+class TSampleAnalyser : TNonCopyable {
+public:
+ TSampleAnalyser(TExecutionSampler::TSampleVector& samples, const TExecutionSampler::TStats& stats, bool putTimeStamps = false)
+ : Samples()
+ , Stats(stats)
+ , PutTimestamps(putTimeStamps)
+ {
+ Samples.swap(samples);
+ }
+
+ ~TSampleAnalyser() = default;
+
+ void Analyze(FILE* out) const;
+
+private:
+ TExecutionSampler::TSampleVector Samples;
+ TExecutionSampler::TStats Stats;
+ bool PutTimestamps;
+};
+
+void TSampleAnalyser::Analyze(FILE* out) const {
+ fprintf(out, "samples: %" PRIu64 " unique: %" PRIu64 " dropped: %" PRIu64 " searchskips: %" PRIu64 "\n",
+ (ui64)Stats.SavedSamples, (ui64)Samples.size(),
+ (ui64)Stats.DroppedSamples, (ui64)Stats.SearchSkipCount);
+
+ fprintf(out, "\nSamples:\n");
+ size_t funcCnt = 0;
+ void* prevModBase = (void*)-1;
+ void* prevFunc = (void*)-1;
+ for (size_t i = 0; i < Samples.size(); ++i) {
+ // print cycle count once in a while to estimate time consumed by
+ // dumping the samples
+ if (PutTimestamps && (i % 1000 == 0)) {
+ ui64 tm = GetCycleCount();
+ fprintf(out, "TM: %" PRIu64 "\n", tm);
+ }
+
+ Dl_info addrInfo;
+ if (dladdr(Samples[i].first, &addrInfo)) {
+ if (addrInfo.dli_fbase != prevModBase || addrInfo.dli_saddr != prevFunc) {
+ fprintf(out, "Func\t%" PRISZT "\t%p\t%p\t%s\t%s\n",
+ funcCnt,
+ addrInfo.dli_fbase,
+ addrInfo.dli_saddr,
+ addrInfo.dli_fname,
+ addrInfo.dli_sname);
+ prevModBase = addrInfo.dli_fbase;
+ prevFunc = addrInfo.dli_saddr;
+ ++funcCnt;
+ }
+ } else {
+ fprintf(out, "[dladdr failed]\n");
+ }
+ fprintf(out, "%" PRISZT "\t%p\t%lu\n", i, Samples[i].first, Samples[i].second);
+ }
+}
+
+TExecutionSampler* TExecutionSampler::SInstance = nullptr;
+
+// Starts capturing execution samples
+void BeginProfiling() {
+ TExecutionSampler::Instance()->Start();
+}
+
+// Resets captured execution samples
+void ResetProfile() {
+ TExecutionSampler::Instance()->ResetStats();
+}
+
+void DumpRUsage(FILE* out) {
+ rusage ru;
+ int e = getrusage(RUSAGE_SELF, &ru);
+ if (e != 0)
+ return;
+
+ fprintf(out,
+ "user time: %lf\n"
+ "system time: %lf\n"
+ "max RSS: %ld\n"
+ "shared text: %ld\n"
+ "unshared data: %ld\n"
+ "unshared stack: %ld\n"
+ "page reclaims: %ld\n"
+ "page faults: %ld\n"
+ "swaps: %ld\n"
+ "block input ops: %ld\n"
+ "block output ops: %ld\n"
+ "msg sent: %ld\n"
+ "msg received: %ld\n"
+ "signals: %ld\n"
+ "voluntary ctx switches: %ld\n"
+ "involuntary ctx switches: %ld\n\n",
+ ru.ru_utime.tv_sec + (ru.ru_utime.tv_usec * 0.000001),
+ ru.ru_stime.tv_sec + (ru.ru_stime.tv_usec * 0.000001),
+ ru.ru_maxrss, ru.ru_ixrss, ru.ru_idrss, ru.ru_isrss,
+ ru.ru_minflt, ru.ru_majflt, ru.ru_nswap,
+ ru.ru_inblock, ru.ru_oublock,
+ ru.ru_msgsnd, ru.ru_msgrcv,
+ ru.ru_nsignals,
+ ru.ru_nvcsw, ru.ru_nivcsw);
+}
+
+// Pauses capturing execution samples and dumps them to the file
+// Samples are not cleared so that profiling can be continued by calling BeginProfiling()
+// or it can be started from scratch by calling ResetProfile() and then BeginProfiling()
+void EndProfiling(FILE* out) {
+ DumpRUsage(out);
+
+ TExecutionSampler::TSampleVector samples;
+ TExecutionSampler::TStats stats;
+ TExecutionSampler::Instance()->Stop(samples, stats);
+
+ TSampleAnalyser analyzer(samples, stats);
+ analyzer.Analyze(out);
+}
+
+void EndProfiling() {
+ static unsigned cnt = 0;
+ char nameBuf[256];
+ snprintf(nameBuf, sizeof(nameBuf), "./%s.%d.%u.profile", getprogname(), (int)getpid(), cnt);
+ FILE* out = fopen(nameBuf, "a");
+ EndProfiling(out);
+ fclose(out);
+ ++cnt;
+}
+
+#else
+
+// NOTE: not supported on Windows
+
+void BeginProfiling() {
+}
+
+void ResetProfile() {
+}
+
+void EndProfiling(FILE*) {
+}
+
+void EndProfiling() {
+}
+
+#endif
diff --git a/library/cpp/execprofile/profile.h b/library/cpp/execprofile/profile.h
new file mode 100644
index 00000000000..ccb88666565
--- /dev/null
+++ b/library/cpp/execprofile/profile.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <stdio.h>
+
+// Starts capturing execution samples
+void BeginProfiling();
+
+// Resets captured execution samples
+void ResetProfile();
+
+// Pauses capturing execution samples and dumps them to the file
+// Samples are not cleared so that profiling can be continued by calling BeginProfiling()
+// or it can be started from scratch by calling ResetProfile() and then BeginProfiling()
+void EndProfiling(FILE* out);
+
+// Dumps the profile to default file (basename.pid.N.profile)
+void EndProfiling();
diff --git a/library/cpp/execprofile/ya.make b/library/cpp/execprofile/ya.make
new file mode 100644
index 00000000000..9d202ac4eb2
--- /dev/null
+++ b/library/cpp/execprofile/ya.make
@@ -0,0 +1,9 @@
+OWNER(g:cpp-contrib)
+
+LIBRARY()
+
+SRCS(
+ profile.cpp
+)
+
+END()