aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/execprofile/annotate_profile.pl
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/execprofile/annotate_profile.pl
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/execprofile/annotate_profile.pl')
-rw-r--r--library/cpp/execprofile/annotate_profile.pl360
1 files changed, 360 insertions, 0 deletions
diff --git a/library/cpp/execprofile/annotate_profile.pl b/library/cpp/execprofile/annotate_profile.pl
new file mode 100644
index 0000000000..1a8c5d65a1
--- /dev/null
+++ b/library/cpp/execprofile/annotate_profile.pl
@@ -0,0 +1,360 @@
+#!/usr/bin/env perl
+
+#
+# Takes profile file as an input and prints out annotated disassmebly
+# Usage:
+# ./annotate_profile.pl <binary_name> <profile_name>
+#
+
+
+# Function to draw bar of the specified length filled up to specified length
+sub DRAW_BAR($$) {
+ my ($length, $filled) = @_;
+ my $bar = "";
+ --$filled;
+ while ($filled > 0) {
+ $bar = $bar . "X";
+ $length--;
+ $filled--;
+ }
+ while ($length > 0) {
+ $bar = $bar . " ";
+ $length--;
+ }
+ return $bar;
+}
+
+my $curFunc = "";
+my $curModule = "";
+my $allHits = 0;
+my %moduleHits;
+my %funcModule;
+my %funcHits;
+my %funcHottestCount;
+my %funcStart;
+my %funcEnd;
+my %funcNames;
+my %funcBaseAddrs;
+my %funcSizes;
+my %addrHits;
+my %addrFunc;
+my %moduleBaseAddr;
+my @funcSortByAddr;
+my %demangledNames;
+my %srcLineHits;
+my %srcFileHits;
+
+# Demagles C++ function name
+sub DEMANGLE($) {
+ my ($name) = @_;
+ if (exists $demangledNames{$name}) {
+ return $demangledNames{$name};
+ }
+ if ($name =~ /^_Z/) {
+ my $cmd = "c++filt -p \'$name\' |";
+ open(my $RES, $cmd ) || die "No c++filt";
+ my $demangled_name = <$RES>;
+ chomp($demangled_name);
+ close $RES;
+ if (length($demangled_name) !=0) {
+ $name = $demangled_name;
+ }
+ }
+ return $name;
+}
+
+# Saves function info
+sub AddFunc($$$$$)
+{
+ my ($func, $bin_file, $baseAddr, $size, $name) = @_;
+ $funcModule{$func} = $bin_file;
+ $funcBaseAddrs{$func} = $baseAddr;
+ # A function with the same base address can be mentioned multiple times with different sizes (0, and non-0, WTF??)
+ if ((! exists $funcSizes{$func}) || ($funcSizes{$func} < $size)) {
+ $funcSizes{$func} = $size;
+ }
+ $funcNames{$func} = $name;
+ $funcStart{$func} = $func;
+# printf "%08x\t%08x\t%016x\t%s\t%s\n",
+# $funcBaseAddrs{$func}, $funcSizes{$func}, $moduleBaseAddr, $funcModule{$func}, $funcNames{$func};
+}
+
+# Reads list of all functions in a module
+sub ReadFunctionList($$) {
+ my ($bin_file, $moduleBaseAddr) = @_;
+ if (! -e $bin_file) {
+ return;
+ }
+ my $readelf_cmd = "readelf -W -s $bin_file |";
+# print "$readelf_cmd\n";
+ my $IN_FILE;
+ open($IN_FILE, $readelf_cmd) || die "couldn't open the file!";
+ while (my $line = <$IN_FILE>) {
+ chomp($line);
+ # " 33: 00000000000a0fc0 433 FUNC GLOBAL DEFAULT 10 getipnodebyaddr@@FBSD_1.0"
+ if ($line =~ m/^\s*\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+FUNC\s+\w+\s+DEFAULT\s+\d+\s+(.*)$/) {
+ # Read function info
+ my $name = $3;
+ my $baseAddr = hex($1) + $moduleBaseAddr;
+ my $func = $baseAddr;
+ my $size = $2;
+ AddFunc($func, $bin_file, $baseAddr, $size, $name);
+ }
+ }
+ close($IN_FILE);
+ @funcSortByAddr = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcBaseAddrs;
+# printf "%016x\t%s\t%d\n", $moduleBaseAddr, $bin_file, $#funcSortByAddr+1;
+}
+
+# Reads the profile and attributes address hits to the functions
+sub ReadSamples() {
+ # First pass saves all samples in a hash-table
+ my $samples_file = $ARGV[1];
+ my $IN_FILE;
+ open($IN_FILE, $samples_file)|| die "couldn't open the file!";
+ my $curFuncInd = 0;
+ my $curFunc = 0;
+ my $curFuncBegin = 0;
+ my $curFuncEnd = 0;
+ my $curModule = "";
+ my $curModuleBase = 0;
+ my $read_samples = 0;
+ my $samplesStarted = 0;
+ while (my $line = <$IN_FILE>) {
+ chomp($line);
+
+ if ($line =~ m/^samples:\s+(\d+)\s+unique:\s+(\d+)\s+dropped:\s+(\d+)\s+searchskips:\s+(\d+)$/) {
+ $total_samples = $1;
+ $unique_samples = $2;
+ $dropped_samples = $3;
+ $search_skips = $4;
+ next;
+ }
+
+ if ($line =~ m/^Samples:$/) {
+ $samplesStarted = 1;
+ next;
+ } elsif (!$samplesStarted) {
+ print "$line\n";
+ next;
+ }
+
+# print "$line\n";
+ if ($line =~ m/^Func\t\d+/) {
+ # "Func 2073 0x803323000 0x803332fd0 /lib/libthr.so.3 pthread_cond_init"
+ my @cols = split(/\t/, $line);
+ $curModule = $cols[4];
+ $curModuleBase = hex($cols[2]);
+ if (0x400000 == $curModuleBase) {
+ $curModuleBase = 0;
+ }
+ $curFunc = hex($cols[3]);
+ if (! exists $moduleBaseAddr{$curModule}) {
+ $moduleBaseAddr{$curModule} = $curModuleBase;
+ ReadFunctionList($curModule, $curModuleBase);
+ }
+ if (! exists $funcNames{$curFunc}) {
+ my $name = sprintf("unknown_0x%08x", $curFunc);
+ AddFunc($curFunc, $curModule, $curFunc, 0, $name);
+ }
+ } elsif ($line =~ m/^\d+\t0x([0-9,a-f,A-F]+)\t(\d+)/) {
+ # Read one sample for the current function
+ $read_samples++;
+ my $addr = hex($1);
+# print "$addr\n";
+ if ($addr >= $curFuncEnd) {
+ # Find the function the current address belongs to
+ while ($curFuncInd <= $#funcSortByAddr) {
+ my $f = $funcSortByAddr[$curFuncInd];
+ my $begin = $funcBaseAddrs{$f};
+ my $end = $funcBaseAddrs{$f} + $funcSizes{$f};
+ if ($begin <= $addr and $addr < $end) {
+ $curFunc = $f;
+ $funcStart{$curFunc} = $addr;
+ $curFuncBegin = $begin;
+ $curFuncEnd = $end;
+ last;
+ } elsif ($addr < $begin) {
+# printf "X3: func:%08x\tname:%s\tbase:%08x\tsize:%08x\t%s\nline:%s\n",
+# $curFunc, $funcNames{$curFunc}, $funcBaseAddrs{$curFunc}, $funcSizes{$curFunc}, $curModule, $line;
+ last;
+ }
+ ++$curFuncInd;
+ }
+ }
+
+ $funcHits{$curFunc} += $2;
+ if ($funcHottestCount{$curFunc} < $2) {
+ $funcHottestCount{$curFunc} = $2;
+ }
+ $addrHits{$addr} = $2;
+ $addrFunc{$addr} = $curFunc;
+ $funcEnd{$curFunc} = $addr;
+ $allHits += $2;
+ $moduleHits{$curModule} += $2;
+
+# printf "%08x\t%08x\t%08x\t%08x\t%s\n", $addr, $curFunc, $curFuncBegin, $curFuncEnd, $funcNames{$curFunc};
+ }
+ }
+ close($IN_FILE);
+
+ printf "\nsamples: %d unique: %d dropped: %d searchskips: %d\n", $total_samples, $unique_samples, $dropped_samples, $search_skips;
+ if ($read_samples != $unique_samples) {
+ printf "\n-----------------------------------------------------------------------------------------------------\n";
+ printf "!!!!WARNING: read %d samples, expected %d samples, profiling results might be not acqurate!!!!", $read_samples, $unique_samples;
+ printf "\n-----------------------------------------------------------------------------------------------------\n";
+ }
+}
+
+# Dumps module stats
+sub DumpModules() {
+ # Sort functions by hit counts and dump the list
+ my @modules = sort {$a <=> $b } keys %moduleHits;
+ for (my $i = 0; $i <= $#modules; ++$i) {
+ my $m = $modules[$i];
+ my $cnt = $moduleHits{$m};
+ my $perc = 100.0 * $cnt / $allHits;
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $m;
+ }
+}
+
+# Dumps top N hot functions
+sub DumpHotFunc($) {
+ my ($maxCnt) = @_;
+ # Sort functions by hit counts and dump the list
+ my @hotFunc = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
+# print $#hotFunc;
+ for (my $i = 0; $i <= $#hotFunc && $i < $maxCnt; ++$i) {
+ my $f = $hotFunc[$i];
+ my $cnt = $funcHits{$f};
+ my $perc = 100.0 * $cnt / $allHits;
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), DEMANGLE($funcNames{$f});
+ }
+}
+
+# Dumps top N hotspots (hot addresses)
+sub DumpHotSpots($) {
+ my ($maxCnt) = @_;
+ # Sort addresses by hit counts and dump the list
+ my @hotSpots = sort {$addrHits{$b} <=> $addrHits{$a} } keys %addrHits;
+ for (my $i = 0; $i <= $#hotSpots && $i < $maxCnt; ++$i) {
+ my $s = $hotSpots[$i];
+ my $cnt = $addrHits{$s};
+ my $perc = 100.0 * $cnt / $allHits;
+ my $f = $addrFunc{$s};
+ my $fname = $funcNames{$f};
+ printf "%12d\t%6.2f%% |%s 0x%016x\t%s + 0x%x\n",
+ $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $s, DEMANGLE($fname), $s - $funcBaseAddrs{$f};
+ }
+}
+
+# Adds hit informations to a disassembly line
+sub ANNOTATE_DISASSM($$$$) {
+ my ($address, $disassm, $max_hit_count, $func_hit_count) = @_;
+ my $hit_count = $addrHits{$address};
+ my $perc = sprintf("% 7.2f%%", 100*$hit_count/$func_hit_count);
+ $address = sprintf("% 8x", $address);
+ print $address . " " . $hit_count . "\t" . $perc . " |" .
+ DRAW_BAR(20, 20*$hit_count/$max_hit_count) . "\t" . $disassm . "\n";
+}
+
+# Dumps annotated disassembly of the specified function (actually not the whole function but
+# just the addresses between the first and last hit)
+sub DumpDisasm($) {
+ my ($name) = @_;
+ if (exists $funcStart{$name} && exists $funcEnd{$name} && $funcStart{$name}!=0) {
+ my $module = $funcModule{$name};
+ my $modBase = $moduleBaseAddr{$module};
+ my $start_address = $funcStart{$name} - $modBase;
+ my $stop_address = $funcEnd{$name} - $modBase + 1;
+# print " " . $funcStart{$name} . " " . $funcEnd{$name} . " $modBase ---";
+ my $max_hit_count = $funcHits{$name};
+ my $objdump_cmd = "objdump -C -d -l --start-address=" . $start_address .
+ " --stop-address=" . $stop_address . " " . $module . " |";
+ if ($stop_address - $start_address < 10000000) { # don't try to disaassemble more than 10MB, because most likely it's a bug
+# print STDERR $objdump_cmd . "\n";
+ open(my $OBJDUMP, $objdump_cmd) || die "No objdump";
+ my $srcLine = "func# ". $name;
+ my $srcFile = $module;
+ while (my $objdump_line = <$OBJDUMP>) {
+ # filter disassembly lines
+ if ($objdump_line =~ /^Disassembly of section/) {
+ } elsif ($objdump_line =~ m/^\s*([0-9,a-f,A-F]+):\s*(.*)/) {
+ my $addr = hex($1);
+ my $hit_count = $addrHits{$addr};
+ if ($hit_count > 0) {
+ $srcLineHits{$srcLine} += $hit_count;
+ $srcFileHits{$srcFile} += $hit_count;
+ }
+ ANNOTATE_DISASSM($addr + $modBase, $2, $funcHottestCount{$name}, $max_hit_count);
+ } elsif ($objdump_line =~ m/^(\/.*):(\d+)$/) {
+ $srcLine = $objdump_line;
+ $srcFile = $1;
+ chomp($srcLine);
+ print $objdump_line;
+ } else {
+ print $objdump_line;
+ }
+ }
+ close $OBJDUMP;
+ }
+ }
+}
+
+# Dumps disassemlby for top N hot functions
+sub DumpFuncDissasm($) {
+ (my $maxCnt) = @_;
+ my @funcs = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
+ print $#funcs . "\n";
+ for (my $i = 0; $i <= $#funcs && $i < $maxCnt; ++$i) {
+ my $f = $funcs[$i];
+ print "\n--------------------------------------------------------------------------------------------------------------\n";
+ printf "hits:%d\t%7.2f%%\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
+ $funcHits{$f}, 100*$funcHits{$f}/$allHits, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, DEMANGLE($funcNames{$f});
+ print "--------------------------------------------------------------------------------------------------------------\n";
+ DumpDisasm($f);
+ }
+}
+
+sub DumpSrcFiles($) {
+ (my $maxCnt) = @_;
+ my @srcFiles = sort {$srcFileHits{$b} <=> $srcFileHits{$a} } keys %srcFileHits;
+ for (my $i = 0; $i <= $#srcFiles && $i < $maxCnt; ++$i) {
+ my $f = $srcFiles[$i];
+ my $cnt = $srcFileHits{$f};
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $f;
+ }
+}
+
+sub DumpSrcLines($) {
+ (my $maxCnt) = @_;
+ my @srcLines = sort {$srcLineHits{$b} <=> $srcLineHits{$a} } keys %srcLineHits;
+ for (my $i = 0; $i <= $#srcLines && $i < $maxCnt; ++$i) {
+ my $l = $srcLines[$i];
+ my $cnt = $srcLineHits{$l};
+ printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $l;
+ }
+}
+
+ReadFunctionList($ARGV[0], 0);
+ReadSamples();
+print "\nModules:\n";
+DumpModules();
+print "\nHot functions:\n";
+DumpHotFunc(100);
+print "\nHotspots:\n";
+DumpHotSpots(100);
+DumpFuncDissasm(100);
+print "\nHot src files:\n";
+DumpSrcFiles(100);
+print "\nHot src lines:\n";
+DumpSrcLines(100);
+
+# my @funcs = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcHits;
+# printf "%d\n", $#funcs;
+# for (my $i = 0; $i <= $#funcs; ++$i) {
+# my $f = $funcs[$i];
+# printf "%s\t%d\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
+# $funcNames{$f}, $funcHits{$f}, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, $funcModule{$f};
+# #DumpDisasm($f);
+# }