aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/lfalloc/alloc_profiler/stackcollect.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/lfalloc/alloc_profiler/stackcollect.cpp')
-rw-r--r--library/cpp/lfalloc/alloc_profiler/stackcollect.cpp332
1 files changed, 332 insertions, 0 deletions
diff --git a/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp
new file mode 100644
index 0000000000..fded4e2fd1
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp
@@ -0,0 +1,332 @@
+#include "stackcollect.h"
+
+#include "profiler.h"
+
+#include <util/generic/algorithm.h>
+#include <util/generic/vector.h>
+#include <util/stream/format.h>
+#include <util/stream/str.h>
+#include <util/string/cast.h>
+#include <util/string/printf.h>
+#include <util/system/backtrace.h>
+#include <util/system/spinlock.h>
+#include <util/system/yassert.h>
+
+
+namespace NAllocProfiler {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+class TStackCollector: private TNonCopyable {
+public:
+ struct TFrameInfo {
+ int PrevInd;
+ void* Addr;
+ int Tag;
+ T Stats;
+
+ void Clear()
+ {
+ PrevInd = 0;
+ Addr = nullptr;
+ Tag = 0;
+ Stats.Clear();
+ }
+ };
+
+private:
+ static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024;
+ TFrameInfo Frames[STACKS_HASH_MAP_SIZE];
+
+ ui64 Samples; // Saved samples count
+ ui64 UniqueSamples; // Number of unique addresses
+ ui64 UsedSlots; // Number of occupied slots in the hashtable
+ ui64 DroppedSamples; // Number of unsaved addresses
+ ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions
+
+ TAdaptiveLock Lock;
+
+public:
+ TStackCollector()
+ {
+ Clear();
+ }
+
+ int AddStack(void** stack, size_t frameCount, int tag)
+ {
+ Y_ASSERT(frameCount > 0);
+
+ int prevInd = -1;
+ with_lock (Lock) {
+ for (int i = frameCount - 1; i >= 0; --i) {
+ prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0));
+ if (prevInd == -1) {
+ break;
+ }
+ }
+ }
+ return prevInd;
+ }
+
+ T& GetStats(int stackId)
+ {
+ Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames));
+ Y_ASSERT(!IsSlotEmpty(stackId));
+
+ return Frames[stackId].Stats;
+ }
+
+ const TFrameInfo* GetFrames() const
+ {
+ return Frames;
+ }
+
+ size_t GetFramesCount() const
+ {
+ return Y_ARRAY_SIZE(Frames);
+ }
+
+ void BackTrace(const TFrameInfo* stack, TStackVec<void*, 64>& frames) const
+ {
+ frames.clear();
+ for (size_t i = 0; i < 100; ++i) {
+ frames.push_back(stack->Addr);
+ int prevInd = stack->PrevInd;
+ if (prevInd == -1) {
+ break;
+ }
+ stack = &Frames[prevInd];
+ }
+ }
+
+ void Clear()
+ {
+ for (auto& frame: Frames) {
+ frame.Clear();
+ }
+
+ Samples = 0;
+ DroppedSamples = 0;
+ UniqueSamples = 0;
+ UsedSlots = 0;
+ SearchSkipCount = 0;
+ }
+
+private:
+ // Hash function applied to the addresses
+ static ui32 Hash(void* addr, int prevInd, int tag)
+ {
+ return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE;
+ }
+
+ static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag)
+ {
+ return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag);
+ }
+
+ bool IsSlotEmpty(ui32 slot) const
+ {
+ return Frames[slot].Addr == 0;
+ }
+
+ bool InsertsAllowed() const
+ {
+ return UsedSlots < STACKS_HASH_MAP_SIZE / 2;
+ }
+
+ // returns the index in the hashmap
+ int AddFrame(void* addr, int prevFrameIndex, int tag, bool last)
+ {
+ ui32 slot = Hash(addr, prevFrameIndex, tag);
+ ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE;
+
+ while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) {
+ slot = (slot + 1) % STACKS_HASH_MAP_SIZE;
+ SearchSkipCount++;
+ }
+
+ if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) {
+ if (last) {
+ ++Samples;
+ }
+ } else if (InsertsAllowed() && IsSlotEmpty(slot)) {
+ // add new sample
+ Frames[slot].Clear();
+ Frames[slot].Addr = addr;
+ Frames[slot].PrevInd = prevFrameIndex;
+ Frames[slot].Tag = tag;
+ ++UsedSlots;
+ if (last) {
+ ++UniqueSamples;
+ ++Samples;
+ }
+ } else {
+ // don't insert new sample if the search is becoming too slow
+ ++DroppedSamples;
+ return -1;
+ }
+
+ return slot;
+ }
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAllocationStackCollector::TImpl: public TStackCollector<TStats> {
+ using TBase = TStackCollector<TStats>;
+
+private:
+ TStats Total;
+
+public:
+ int Alloc(void** stack, size_t frameCount, int tag, size_t size)
+ {
+ int stackId = TBase::AddStack(stack, frameCount, tag);
+ if (stackId >= 0) {
+ TBase::GetStats(stackId).Alloc(size);
+ Total.Alloc(size);
+ }
+ return stackId;
+ }
+
+ void Free(int stackId, size_t size)
+ {
+ TBase::GetStats(stackId).Free(size);
+ Total.Free(size);
+ }
+
+ void Clear()
+ {
+ TBase::Clear();
+ Total.Clear();
+ }
+
+ void Dump(int count, IAllocationStatsDumper& out) const
+ {
+ const TFrameInfo* frames = TBase::GetFrames();
+ size_t framesCount = TBase::GetFramesCount();
+
+ TVector<const TFrameInfo*> stacks;
+ for (size_t i = 0; i < framesCount; ++i) {
+ if (frames[i].Stats.Allocs) {
+ stacks.push_back(&frames[i]);
+ }
+ }
+
+ Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) {
+ const auto& ls = l->Stats;
+ const auto& rs = r->Stats;
+ return ls.CurrentSize != rs.CurrentSize
+ ? ls.CurrentSize > rs.CurrentSize
+ : ls.Allocs != rs.Allocs
+ ? ls.Allocs > rs.Allocs
+ : ls.Frees > rs.Frees;
+ });
+
+ out.DumpTotal(Total);
+
+ TAllocationInfo allocInfo;
+ int printedCount = 0;
+ for (const TFrameInfo* stack: stacks) {
+ allocInfo.Clear();
+ allocInfo.Tag = stack->Tag;
+ allocInfo.Stats = stack->Stats;
+ TBase::BackTrace(stack, allocInfo.Stack);
+
+ out.DumpEntry(allocInfo);
+
+ if (++printedCount >= count) {
+ break;
+ }
+ }
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAllocationStackCollector::TAllocationStackCollector()
+ : Impl(new TImpl())
+{}
+
+TAllocationStackCollector::~TAllocationStackCollector()
+{}
+
+int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size)
+{
+ return Impl->Alloc(stack, frameCount, tag, size);
+}
+
+void TAllocationStackCollector::Free(int stackId, size_t size)
+{
+ Impl->Free(stackId, size);
+}
+
+void TAllocationStackCollector::Clear()
+{
+ Impl->Clear();
+}
+
+void TAllocationStackCollector::Dump(int count, IAllocationStatsDumper &out) const
+{
+ Impl->Dump(count, out);
+}
+
+
+TString IAllocationStatsDumper::FormatTag(int tag) {
+ return ToString(tag);
+}
+
+TString IAllocationStatsDumper::FormatSize(intptr_t sz) {
+ return ToString(sz);
+}
+
+
+TAllocationStatsDumper::TAllocationStatsDumper(IOutputStream& out)
+ : PrintedCount(0)
+ , Out(out)
+ , SymbolCache(2048)
+{}
+
+void TAllocationStatsDumper::DumpTotal(const TStats& total) {
+ Out << "TOTAL"
+ << "\tAllocs: " << total.Allocs
+ << "\tFrees: " << total.Frees
+ << "\tCurrentSize: " << FormatSize(total.CurrentSize)
+ << Endl;
+}
+
+void TAllocationStatsDumper::DumpEntry(const TAllocationInfo& allocInfo) {
+ Out << Endl
+ << "STACK #" << PrintedCount+1 << ": " << FormatTag(allocInfo.Tag)
+ << "\tAllocs: " << allocInfo.Stats.Allocs
+ << "\tFrees: " << allocInfo.Stats.Frees
+ << "\tCurrentSize: " << FormatSize(allocInfo.Stats.CurrentSize)
+ << Endl;
+ FormatBackTrace(allocInfo.Stack.data(), allocInfo.Stack.size());
+ PrintedCount++;
+}
+
+void TAllocationStatsDumper::FormatBackTrace(void* const* stack, size_t sz) {
+ char name[1024];
+ for (size_t i = 0; i < sz; ++i) {
+ TSymbol symbol;
+ auto it = SymbolCache.Find(stack[i]);
+ if (it != SymbolCache.End()) {
+ symbol = it.Value();
+ } else {
+ TResolvedSymbol rs = ResolveSymbol(stack[i], name, sizeof(name));
+ symbol = {rs.NearestSymbol, rs.Name};
+ SymbolCache.Insert(stack[i], symbol);
+ }
+
+ Out << Hex((intptr_t)stack[i], HF_FULL) << "\t" << symbol.Name;
+ intptr_t offset = (intptr_t)stack[i] - (intptr_t)symbol.Address;
+ if (offset)
+ Out << " +" << offset;
+ Out << Endl;
+ }
+}
+
+} // namespace NAllocProfiler