diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/lfalloc/alloc_profiler/stackcollect.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/lfalloc/alloc_profiler/stackcollect.cpp')
-rw-r--r-- | library/cpp/lfalloc/alloc_profiler/stackcollect.cpp | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp new file mode 100644 index 0000000000..fded4e2fd1 --- /dev/null +++ b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp @@ -0,0 +1,332 @@ +#include "stackcollect.h" + +#include "profiler.h" + +#include <util/generic/algorithm.h> +#include <util/generic/vector.h> +#include <util/stream/format.h> +#include <util/stream/str.h> +#include <util/string/cast.h> +#include <util/string/printf.h> +#include <util/system/backtrace.h> +#include <util/system/spinlock.h> +#include <util/system/yassert.h> + + +namespace NAllocProfiler { + +//////////////////////////////////////////////////////////////////////////////// + +template <typename T> +class TStackCollector: private TNonCopyable { +public: + struct TFrameInfo { + int PrevInd; + void* Addr; + int Tag; + T Stats; + + void Clear() + { + PrevInd = 0; + Addr = nullptr; + Tag = 0; + Stats.Clear(); + } + }; + +private: + static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024; + TFrameInfo Frames[STACKS_HASH_MAP_SIZE]; + + ui64 Samples; // Saved samples count + ui64 UniqueSamples; // Number of unique addresses + ui64 UsedSlots; // Number of occupied slots in the hashtable + ui64 DroppedSamples; // Number of unsaved addresses + ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions + + TAdaptiveLock Lock; + +public: + TStackCollector() + { + Clear(); + } + + int AddStack(void** stack, size_t frameCount, int tag) + { + Y_ASSERT(frameCount > 0); + + int prevInd = -1; + with_lock (Lock) { + for (int i = frameCount - 1; i >= 0; --i) { + prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0)); + if (prevInd == -1) { + break; + } + } + } + return prevInd; + } + + T& GetStats(int stackId) + { + Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames)); + Y_ASSERT(!IsSlotEmpty(stackId)); + + return Frames[stackId].Stats; + } + + const TFrameInfo* GetFrames() const + { + return Frames; + } + + size_t GetFramesCount() const + { + return Y_ARRAY_SIZE(Frames); + } + + void BackTrace(const TFrameInfo* stack, TStackVec<void*, 64>& frames) const + { + frames.clear(); + for (size_t i = 0; i < 100; ++i) { + frames.push_back(stack->Addr); + int prevInd = stack->PrevInd; + if (prevInd == -1) { + break; + } + stack = &Frames[prevInd]; + } + } + + void Clear() + { + for (auto& frame: Frames) { + frame.Clear(); + } + + Samples = 0; + DroppedSamples = 0; + UniqueSamples = 0; + UsedSlots = 0; + SearchSkipCount = 0; + } + +private: + // Hash function applied to the addresses + static ui32 Hash(void* addr, int prevInd, int tag) + { + return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE; + } + + static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag) + { + return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag); + } + + bool IsSlotEmpty(ui32 slot) const + { + return Frames[slot].Addr == 0; + } + + bool InsertsAllowed() const + { + return UsedSlots < STACKS_HASH_MAP_SIZE / 2; + } + + // returns the index in the hashmap + int AddFrame(void* addr, int prevFrameIndex, int tag, bool last) + { + ui32 slot = Hash(addr, prevFrameIndex, tag); + ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE; + + while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) { + slot = (slot + 1) % STACKS_HASH_MAP_SIZE; + SearchSkipCount++; + } + + if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) { + if (last) { + ++Samples; + } + } else if (InsertsAllowed() && IsSlotEmpty(slot)) { + // add new sample + Frames[slot].Clear(); + Frames[slot].Addr = addr; + Frames[slot].PrevInd = prevFrameIndex; + Frames[slot].Tag = tag; + ++UsedSlots; + if (last) { + ++UniqueSamples; + ++Samples; + } + } else { + // don't insert new sample if the search is becoming too slow + ++DroppedSamples; + return -1; + } + + return slot; + } +}; + + +//////////////////////////////////////////////////////////////////////////////// + +class TAllocationStackCollector::TImpl: public TStackCollector<TStats> { + using TBase = TStackCollector<TStats>; + +private: + TStats Total; + +public: + int Alloc(void** stack, size_t frameCount, int tag, size_t size) + { + int stackId = TBase::AddStack(stack, frameCount, tag); + if (stackId >= 0) { + TBase::GetStats(stackId).Alloc(size); + Total.Alloc(size); + } + return stackId; + } + + void Free(int stackId, size_t size) + { + TBase::GetStats(stackId).Free(size); + Total.Free(size); + } + + void Clear() + { + TBase::Clear(); + Total.Clear(); + } + + void Dump(int count, IAllocationStatsDumper& out) const + { + const TFrameInfo* frames = TBase::GetFrames(); + size_t framesCount = TBase::GetFramesCount(); + + TVector<const TFrameInfo*> stacks; + for (size_t i = 0; i < framesCount; ++i) { + if (frames[i].Stats.Allocs) { + stacks.push_back(&frames[i]); + } + } + + Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) { + const auto& ls = l->Stats; + const auto& rs = r->Stats; + return ls.CurrentSize != rs.CurrentSize + ? ls.CurrentSize > rs.CurrentSize + : ls.Allocs != rs.Allocs + ? ls.Allocs > rs.Allocs + : ls.Frees > rs.Frees; + }); + + out.DumpTotal(Total); + + TAllocationInfo allocInfo; + int printedCount = 0; + for (const TFrameInfo* stack: stacks) { + allocInfo.Clear(); + allocInfo.Tag = stack->Tag; + allocInfo.Stats = stack->Stats; + TBase::BackTrace(stack, allocInfo.Stack); + + out.DumpEntry(allocInfo); + + if (++printedCount >= count) { + break; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +TAllocationStackCollector::TAllocationStackCollector() + : Impl(new TImpl()) +{} + +TAllocationStackCollector::~TAllocationStackCollector() +{} + +int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size) +{ + return Impl->Alloc(stack, frameCount, tag, size); +} + +void TAllocationStackCollector::Free(int stackId, size_t size) +{ + Impl->Free(stackId, size); +} + +void TAllocationStackCollector::Clear() +{ + Impl->Clear(); +} + +void TAllocationStackCollector::Dump(int count, IAllocationStatsDumper &out) const +{ + Impl->Dump(count, out); +} + + +TString IAllocationStatsDumper::FormatTag(int tag) { + return ToString(tag); +} + +TString IAllocationStatsDumper::FormatSize(intptr_t sz) { + return ToString(sz); +} + + +TAllocationStatsDumper::TAllocationStatsDumper(IOutputStream& out) + : PrintedCount(0) + , Out(out) + , SymbolCache(2048) +{} + +void TAllocationStatsDumper::DumpTotal(const TStats& total) { + Out << "TOTAL" + << "\tAllocs: " << total.Allocs + << "\tFrees: " << total.Frees + << "\tCurrentSize: " << FormatSize(total.CurrentSize) + << Endl; +} + +void TAllocationStatsDumper::DumpEntry(const TAllocationInfo& allocInfo) { + Out << Endl + << "STACK #" << PrintedCount+1 << ": " << FormatTag(allocInfo.Tag) + << "\tAllocs: " << allocInfo.Stats.Allocs + << "\tFrees: " << allocInfo.Stats.Frees + << "\tCurrentSize: " << FormatSize(allocInfo.Stats.CurrentSize) + << Endl; + FormatBackTrace(allocInfo.Stack.data(), allocInfo.Stack.size()); + PrintedCount++; +} + +void TAllocationStatsDumper::FormatBackTrace(void* const* stack, size_t sz) { + char name[1024]; + for (size_t i = 0; i < sz; ++i) { + TSymbol symbol; + auto it = SymbolCache.Find(stack[i]); + if (it != SymbolCache.End()) { + symbol = it.Value(); + } else { + TResolvedSymbol rs = ResolveSymbol(stack[i], name, sizeof(name)); + symbol = {rs.NearestSymbol, rs.Name}; + SymbolCache.Insert(stack[i], symbol); + } + + Out << Hex((intptr_t)stack[i], HF_FULL) << "\t" << symbol.Name; + intptr_t offset = (intptr_t)stack[i] - (intptr_t)symbol.Address; + if (offset) + Out << " +" << offset; + Out << Endl; + } +} + +} // namespace NAllocProfiler |