#!/usr/bin/env perl
#
# Takes profile file as an input and prints out annotated disassmebly
# Usage:
# ./annotate_profile.pl <binary_name> <profile_name>
#
# Function to draw bar of the specified length filled up to specified length
sub DRAW_BAR($$) {
my ($length, $filled) = @_;
my $bar = "";
--$filled;
while ($filled > 0) {
$bar = $bar . "X";
$length--;
$filled--;
}
while ($length > 0) {
$bar = $bar . " ";
$length--;
}
return $bar;
}
my $curFunc = "";
my $curModule = "";
my $allHits = 0;
my %moduleHits;
my %funcModule;
my %funcHits;
my %funcHottestCount;
my %funcStart;
my %funcEnd;
my %funcNames;
my %funcBaseAddrs;
my %funcSizes;
my %addrHits;
my %addrFunc;
my %moduleBaseAddr;
my @funcSortByAddr;
my %demangledNames;
my %srcLineHits;
my %srcFileHits;
# Demagles C++ function name
sub DEMANGLE($) {
my ($name) = @_;
if (exists $demangledNames{$name}) {
return $demangledNames{$name};
}
if ($name =~ /^_Z/) {
my $cmd = "c++filt -p \'$name\' |";
open(my $RES, $cmd ) || die "No c++filt";
my $demangled_name = <$RES>;
chomp($demangled_name);
close $RES;
if (length($demangled_name) !=0) {
$name = $demangled_name;
}
}
return $name;
}
# Saves function info
sub AddFunc($$$$$)
{
my ($func, $bin_file, $baseAddr, $size, $name) = @_;
$funcModule{$func} = $bin_file;
$funcBaseAddrs{$func} = $baseAddr;
# A function with the same base address can be mentioned multiple times with different sizes (0, and non-0, WTF??)
if ((! exists $funcSizes{$func}) || ($funcSizes{$func} < $size)) {
$funcSizes{$func} = $size;
}
$funcNames{$func} = $name;
$funcStart{$func} = $func;
# printf "%08x\t%08x\t%016x\t%s\t%s\n",
# $funcBaseAddrs{$func}, $funcSizes{$func}, $moduleBaseAddr, $funcModule{$func}, $funcNames{$func};
}
# Reads list of all functions in a module
sub ReadFunctionList($$) {
my ($bin_file, $moduleBaseAddr) = @_;
if (! -e $bin_file) {
return;
}
my $readelf_cmd = "readelf -W -s $bin_file |";
# print "$readelf_cmd\n";
my $IN_FILE;
open($IN_FILE, $readelf_cmd) || die "couldn't open the file!";
while (my $line = <$IN_FILE>) {
chomp($line);
# " 33: 00000000000a0fc0 433 FUNC GLOBAL DEFAULT 10 getipnodebyaddr@@FBSD_1.0"
if ($line =~ m/^\s*\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+FUNC\s+\w+\s+DEFAULT\s+\d+\s+(.*)$/) {
# Read function info
my $name = $3;
my $baseAddr = hex($1) + $moduleBaseAddr;
my $func = $baseAddr;
my $size = $2;
AddFunc($func, $bin_file, $baseAddr, $size, $name);
}
}
close($IN_FILE);
@funcSortByAddr = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcBaseAddrs;
# printf "%016x\t%s\t%d\n", $moduleBaseAddr, $bin_file, $#funcSortByAddr+1;
}
# Reads the profile and attributes address hits to the functions
sub ReadSamples() {
# First pass saves all samples in a hash-table
my $samples_file = $ARGV[1];
my $IN_FILE;
open($IN_FILE, $samples_file)|| die "couldn't open the file!";
my $curFuncInd = 0;
my $curFunc = 0;
my $curFuncBegin = 0;
my $curFuncEnd = 0;
my $curModule = "";
my $curModuleBase = 0;
my $read_samples = 0;
my $samplesStarted = 0;
while (my $line = <$IN_FILE>) {
chomp($line);
if ($line =~ m/^samples:\s+(\d+)\s+unique:\s+(\d+)\s+dropped:\s+(\d+)\s+searchskips:\s+(\d+)$/) {
$total_samples = $1;
$unique_samples = $2;
$dropped_samples = $3;
$search_skips = $4;
next;
}
if ($line =~ m/^Samples:$/) {
$samplesStarted = 1;
next;
} elsif (!$samplesStarted) {
print "$line\n";
next;
}
# print "$line\n";
if ($line =~ m/^Func\t\d+/) {
# "Func 2073 0x803323000 0x803332fd0 /lib/libthr.so.3 pthread_cond_init"
my @cols = split(/\t/, $line);
$curModule = $cols[4];
$curModuleBase = hex($cols[2]);
if (0x400000 == $curModuleBase) {
$curModuleBase = 0;
}
$curFunc = hex($cols[3]);
if (! exists $moduleBaseAddr{$curModule}) {
$moduleBaseAddr{$curModule} = $curModuleBase;
ReadFunctionList($curModule, $curModuleBase);
}
if (! exists $funcNames{$curFunc}) {
my $name = sprintf("unknown_0x%08x", $curFunc);
AddFunc($curFunc, $curModule, $curFunc, 0, $name);
}
} elsif ($line =~ m/^\d+\t0x([0-9,a-f,A-F]+)\t(\d+)/) {
# Read one sample for the current function
$read_samples++;
my $addr = hex($1);
# print "$addr\n";
if ($addr >= $curFuncEnd) {
# Find the function the current address belongs to
while ($curFuncInd <= $#funcSortByAddr) {
my $f = $funcSortByAddr[$curFuncInd];
my $begin = $funcBaseAddrs{$f};
my $end = $funcBaseAddrs{$f} + $funcSizes{$f};
if ($begin <= $addr and $addr < $end) {
$curFunc = $f;
$funcStart{$curFunc} = $addr;
$curFuncBegin = $begin;
$curFuncEnd = $end;
last;
} elsif ($addr < $begin) {
# printf "X3: func:%08x\tname:%s\tbase:%08x\tsize:%08x\t%s\nline:%s\n",
# $curFunc, $funcNames{$curFunc}, $funcBaseAddrs{$curFunc}, $funcSizes{$curFunc}, $curModule, $line;
last;
}
++$curFuncInd;
}
}
$funcHits{$curFunc} += $2;
if ($funcHottestCount{$curFunc} < $2) {
$funcHottestCount{$curFunc} = $2;
}
$addrHits{$addr} = $2;
$addrFunc{$addr} = $curFunc;
$funcEnd{$curFunc} = $addr;
$allHits += $2;
$moduleHits{$curModule} += $2;
# printf "%08x\t%08x\t%08x\t%08x\t%s\n", $addr, $curFunc, $curFuncBegin, $curFuncEnd, $funcNames{$curFunc};
}
}
close($IN_FILE);
printf "\nsamples: %d unique: %d dropped: %d searchskips: %d\n", $total_samples, $unique_samples, $dropped_samples, $search_skips;
if ($read_samples != $unique_samples) {
printf "\n-----------------------------------------------------------------------------------------------------\n";
printf "!!!!WARNING: read %d samples, expected %d samples, profiling results might be not acqurate!!!!", $read_samples, $unique_samples;
printf "\n-----------------------------------------------------------------------------------------------------\n";
}
}
# Dumps module stats
sub DumpModules() {
# Sort functions by hit counts and dump the list
my @modules = sort {$a <=> $b } keys %moduleHits;
for (my $i = 0; $i <= $#modules; ++$i) {
my $m = $modules[$i];
my $cnt = $moduleHits{$m};
my $perc = 100.0 * $cnt / $allHits;
printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $m;
}
}
# Dumps top N hot functions
sub DumpHotFunc($) {
my ($maxCnt) = @_;
# Sort functions by hit counts and dump the list
my @hotFunc = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
# print $#hotFunc;
for (my $i = 0; $i <= $#hotFunc && $i < $maxCnt; ++$i) {
my $f = $hotFunc[$i];
my $cnt = $funcHits{$f};
my $perc = 100.0 * $cnt / $allHits;
printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), DEMANGLE($funcNames{$f});
}
}
# Dumps top N hotspots (hot addresses)
sub DumpHotSpots($) {
my ($maxCnt) = @_;
# Sort addresses by hit counts and dump the list
my @hotSpots = sort {$addrHits{$b} <=> $addrHits{$a} } keys %addrHits;
for (my $i = 0; $i <= $#hotSpots && $i < $maxCnt; ++$i) {
my $s = $hotSpots[$i];
my $cnt = $addrHits{$s};
my $perc = 100.0 * $cnt / $allHits;
my $f = $addrFunc{$s};
my $fname = $funcNames{$f};
printf "%12d\t%6.2f%% |%s 0x%016x\t%s + 0x%x\n",
$cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $s, DEMANGLE($fname), $s - $funcBaseAddrs{$f};
}
}
# Adds hit informations to a disassembly line
sub ANNOTATE_DISASSM($$$$) {
my ($address, $disassm, $max_hit_count, $func_hit_count) = @_;
my $hit_count = $addrHits{$address};
my $perc = sprintf("% 7.2f%%", 100*$hit_count/$func_hit_count);
$address = sprintf("% 8x", $address);
print $address . " " . $hit_count . "\t" . $perc . " |" .
DRAW_BAR(20, 20*$hit_count/$max_hit_count) . "\t" . $disassm . "\n";
}
# Dumps annotated disassembly of the specified function (actually not the whole function but
# just the addresses between the first and last hit)
sub DumpDisasm($) {
my ($name) = @_;
if (exists $funcStart{$name} && exists $funcEnd{$name} && $funcStart{$name}!=0) {
my $module = $funcModule{$name};
my $modBase = $moduleBaseAddr{$module};
my $start_address = $funcStart{$name} - $modBase;
my $stop_address = $funcEnd{$name} - $modBase + 1;
# print " " . $funcStart{$name} . " " . $funcEnd{$name} . " $modBase ---";
my $max_hit_count = $funcHits{$name};
my $objdump_cmd = "objdump -C -d -l --start-address=" . $start_address .
" --stop-address=" . $stop_address . " " . $module . " |";
if ($stop_address - $start_address < 10000000) { # don't try to disaassemble more than 10MB, because most likely it's a bug
# print STDERR $objdump_cmd . "\n";
open(my $OBJDUMP, $objdump_cmd) || die "No objdump";
my $srcLine = "func# ". $name;
my $srcFile = $module;
while (my $objdump_line = <$OBJDUMP>) {
# filter disassembly lines
if ($objdump_line =~ /^Disassembly of section/) {
} elsif ($objdump_line =~ m/^\s*([0-9,a-f,A-F]+):\s*(.*)/) {
my $addr = hex($1);
my $hit_count = $addrHits{$addr};
if ($hit_count > 0) {
$srcLineHits{$srcLine} += $hit_count;
$srcFileHits{$srcFile} += $hit_count;
}
ANNOTATE_DISASSM($addr + $modBase, $2, $funcHottestCount{$name}, $max_hit_count);
} elsif ($objdump_line =~ m/^(\/.*):(\d+)$/) {
$srcLine = $objdump_line;
$srcFile = $1;
chomp($srcLine);
print $objdump_line;
} else {
print $objdump_line;
}
}
close $OBJDUMP;
}
}
}
# Dumps disassemlby for top N hot functions
sub DumpFuncDissasm($) {
(my $maxCnt) = @_;
my @funcs = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
print $#funcs . "\n";
for (my $i = 0; $i <= $#funcs && $i < $maxCnt; ++$i) {
my $f = $funcs[$i];
print "\n--------------------------------------------------------------------------------------------------------------\n";
printf "hits:%d\t%7.2f%%\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
$funcHits{$f}, 100*$funcHits{$f}/$allHits, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, DEMANGLE($funcNames{$f});
print "--------------------------------------------------------------------------------------------------------------\n";
DumpDisasm($f);
}
}
sub DumpSrcFiles($) {
(my $maxCnt) = @_;
my @srcFiles = sort {$srcFileHits{$b} <=> $srcFileHits{$a} } keys %srcFileHits;
for (my $i = 0; $i <= $#srcFiles && $i < $maxCnt; ++$i) {
my $f = $srcFiles[$i];
my $cnt = $srcFileHits{$f};
printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $f;
}
}
sub DumpSrcLines($) {
(my $maxCnt) = @_;
my @srcLines = sort {$srcLineHits{$b} <=> $srcLineHits{$a} } keys %srcLineHits;
for (my $i = 0; $i <= $#srcLines && $i < $maxCnt; ++$i) {
my $l = $srcLines[$i];
my $cnt = $srcLineHits{$l};
printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $l;
}
}
ReadFunctionList($ARGV[0], 0);
ReadSamples();
print "\nModules:\n";
DumpModules();
print "\nHot functions:\n";
DumpHotFunc(100);
print "\nHotspots:\n";
DumpHotSpots(100);
DumpFuncDissasm(100);
print "\nHot src files:\n";
DumpSrcFiles(100);
print "\nHot src lines:\n";
DumpSrcLines(100);
# my @funcs = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcHits;
# printf "%d\n", $#funcs;
# for (my $i = 0; $i <= $#funcs; ++$i) {
# my $f = $funcs[$i];
# printf "%s\t%d\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
# $funcNames{$f}, $funcHits{$f}, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, $funcModule{$f};
# #DumpDisasm($f);
# }