aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/lfalloc/alloc_profiler
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/lfalloc/alloc_profiler
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/lfalloc/alloc_profiler')
-rw-r--r--library/cpp/lfalloc/alloc_profiler/align_ut.cpp23
-rw-r--r--library/cpp/lfalloc/alloc_profiler/profiler.cpp81
-rw-r--r--library/cpp/lfalloc/alloc_profiler/profiler.h45
-rw-r--r--library/cpp/lfalloc/alloc_profiler/profiler_ut.cpp76
-rw-r--r--library/cpp/lfalloc/alloc_profiler/stackcollect.cpp332
-rw-r--r--library/cpp/lfalloc/alloc_profiler/stackcollect.h107
-rw-r--r--library/cpp/lfalloc/alloc_profiler/ut/ya.make22
-rw-r--r--library/cpp/lfalloc/alloc_profiler/ya.make17
8 files changed, 703 insertions, 0 deletions
diff --git a/library/cpp/lfalloc/alloc_profiler/align_ut.cpp b/library/cpp/lfalloc/alloc_profiler/align_ut.cpp
new file mode 100644
index 00000000000..db9b17b95ba
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/align_ut.cpp
@@ -0,0 +1,23 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/scope.h>
+
+Y_UNIT_TEST_SUITE(MemAlign) {
+ Y_UNIT_TEST(ShouldAlign)
+ {
+ for (ui64 size = 8; size <= 32 * 1024; size *= 8) {
+ for (ui64 align = 8; align <= 4096; align *=2) {
+ void* ptr = nullptr;
+
+ int res = posix_memalign(&ptr, align, size);
+ UNIT_ASSERT_C(res == 0 && ptr != nullptr, "memalign failed");
+
+ Y_DEFER {
+ free(ptr);
+ };
+
+ UNIT_ASSERT_C((uintptr_t)ptr % align == 0, "non aligned memory");
+ }
+ }
+ }
+}
diff --git a/library/cpp/lfalloc/alloc_profiler/profiler.cpp b/library/cpp/lfalloc/alloc_profiler/profiler.cpp
new file mode 100644
index 00000000000..0e30927a5a2
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/profiler.cpp
@@ -0,0 +1,81 @@
+#include "profiler.h"
+
+#include "stackcollect.h"
+
+#include <util/generic/algorithm.h>
+#include <util/generic/singleton.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/stream/str.h>
+
+namespace NAllocProfiler {
+
+namespace {
+
+static TAllocationStackCollector& AllocationStackCollector()
+{
+ return *Singleton<TAllocationStackCollector>();
+}
+
+int AllocationCallback(int tag, size_t size, int sizeIdx)
+{
+ Y_UNUSED(sizeIdx);
+
+ static const size_t STACK_FRAMES_COUNT = 32;
+ static const size_t STACK_FRAMES_SKIP = 1;
+
+ void* frames[STACK_FRAMES_COUNT];
+ size_t frameCount = BackTrace(frames, Y_ARRAY_SIZE(frames));
+ if (frameCount <= STACK_FRAMES_SKIP) {
+ return -1;
+ }
+
+ void** stack = &frames[STACK_FRAMES_SKIP];
+ frameCount -= STACK_FRAMES_SKIP;
+
+ auto& collector = AllocationStackCollector();
+ return collector.Alloc(stack, frameCount, tag, size);
+}
+
+void DeallocationCallback(int stackId, int tag, size_t size, int sizeIdx)
+{
+ Y_UNUSED(tag);
+ Y_UNUSED(sizeIdx);
+
+ auto& collector = AllocationStackCollector();
+ collector.Free(stackId, size);
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool StartAllocationSampling(bool profileAllThreads)
+{
+ auto& collector = AllocationStackCollector();
+ collector.Clear();
+
+ NAllocDbg::SetProfileAllThreads(profileAllThreads);
+ NAllocDbg::SetAllocationCallback(AllocationCallback);
+ NAllocDbg::SetDeallocationCallback(DeallocationCallback);
+ NAllocDbg::SetAllocationSamplingEnabled(true);
+ return true;
+}
+
+bool StopAllocationSampling(IAllocationStatsDumper &out, int count)
+{
+ NAllocDbg::SetAllocationCallback(nullptr);
+ NAllocDbg::SetDeallocationCallback(nullptr);
+ NAllocDbg::SetAllocationSamplingEnabled(false);
+
+ auto& collector = AllocationStackCollector();
+ collector.Dump(count, out);
+ return true;
+}
+
+bool StopAllocationSampling(IOutputStream& out, int count) {
+ TAllocationStatsDumper dumper(out);
+ return StopAllocationSampling(dumper, count);
+}
+
+} // namespace NProfiler
diff --git a/library/cpp/lfalloc/alloc_profiler/profiler.h b/library/cpp/lfalloc/alloc_profiler/profiler.h
new file mode 100644
index 00000000000..4ea49b9dcc8
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/profiler.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "stackcollect.h"
+
+#include <library/cpp/lfalloc/dbg_info/dbg_info.h>
+
+#include <util/generic/noncopyable.h>
+#include <util/stream/output.h>
+
+namespace NAllocProfiler {
+
+////////////////////////////////////////////////////////////////////////////////
+
+inline int SetCurrentScopeTag(int value)
+{
+ return NAllocDbg::SetThreadAllocTag(value);
+}
+
+inline bool SetProfileCurrentThread(bool value)
+{
+ return NAllocDbg::SetProfileCurrentThread(value);
+}
+
+bool StartAllocationSampling(bool profileAllThreads = false);
+bool StopAllocationSampling(IAllocationStatsDumper& out, int count = 100);
+bool StopAllocationSampling(IOutputStream& out, int count = 100);
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TProfilingScope: private TNonCopyable {
+private:
+ const int Prev;
+
+public:
+ explicit TProfilingScope(int value)
+ : Prev(SetCurrentScopeTag(value))
+ {}
+
+ ~TProfilingScope()
+ {
+ SetCurrentScopeTag(Prev);
+ }
+};
+
+} // namespace NAllocProfiler
diff --git a/library/cpp/lfalloc/alloc_profiler/profiler_ut.cpp b/library/cpp/lfalloc/alloc_profiler/profiler_ut.cpp
new file mode 100644
index 00000000000..4341dda6ed9
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/profiler_ut.cpp
@@ -0,0 +1,76 @@
+#include "profiler.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace NAllocProfiler {
+
+////////////////////////////////////////////////////////////////////////////////
+
+Y_UNIT_TEST_SUITE(Profiler) {
+ Y_UNIT_TEST(StackCollection)
+ {
+ TStringStream str;
+
+ NAllocProfiler::StartAllocationSampling(true);
+ TVector<TAutoPtr<int>> test;
+ // Do many allocations and no deallocations
+ for (int i = 0; i < 10000; ++i) {
+ test.push_back(new int);
+ }
+ NAllocProfiler::StopAllocationSampling(str);
+ //Cout << str.Str() << Endl;
+
+#if !defined(ARCH_AARCH64)
+ /* Check that output resembles this:
+
+ STACK #2: 0 Allocs: 10 Frees: 0 CurrentSize: 40
+ 0000000000492353 ??
+ 000000000048781F operator new(unsigned long) +1807
+ 00000000003733FA NAllocProfiler::NTestSuiteProfiler::TTestCaseStackCollection::Execute_(NUnitTest::TTestContext&) +218
+ 00000000004A1938 NUnitTest::TTestBase::Run(std::__y1::function<void ()>, TString, char const*, bool) +120
+ 0000000000375656 NAllocProfiler::NTestSuiteProfiler::TCurrentTest::Execute() +342
+ 00000000004A20CF NUnitTest::TTestFactory::Execute() +847
+ 000000000049922D NUnitTest::RunMain(int, char**) +1965
+ 00007FF665778F45 __libc_start_main +245
+ */
+
+ UNIT_ASSERT_STRING_CONTAINS(str.Str(), "StackCollection");
+ UNIT_ASSERT_STRING_CONTAINS(str.Str(), "NUnitTest::TTestBase::Run");
+ UNIT_ASSERT_STRING_CONTAINS(str.Str(), "NAllocProfiler::NTestSuiteProfiler::TCurrentTest::Execute");
+ UNIT_ASSERT_STRING_CONTAINS(str.Str(), "NUnitTest::TTestFactory::Execute");
+ UNIT_ASSERT_STRING_CONTAINS(str.Str(), "NUnitTest::RunMain");
+#endif
+ }
+
+ class TAllocDumper : public NAllocProfiler::TAllocationStatsDumper {
+ public:
+ explicit TAllocDumper(IOutputStream& out) : NAllocProfiler::TAllocationStatsDumper(out) {}
+
+ TString FormatTag(int tag) override {
+ UNIT_ASSERT_VALUES_EQUAL(tag, 42);
+ return "TAG_NAME_42";
+ }
+ };
+
+ Y_UNIT_TEST(TagNames)
+ {
+ TStringStream str;
+
+ NAllocProfiler::StartAllocationSampling(true);
+ TVector<TAutoPtr<int>> test;
+ NAllocProfiler::TProfilingScope scope(42);
+ // Do many allocations and no deallocations
+ for (int i = 0; i < 10000; ++i) {
+ test.push_back(new int);
+ }
+
+ TAllocDumper dumper(str);
+ NAllocProfiler::StopAllocationSampling(dumper);
+
+#if !defined(ARCH_AARCH64)
+ UNIT_ASSERT_STRING_CONTAINS(str.Str(), "TAG_NAME_42");
+#endif
+ }
+}
+
+}
diff --git a/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp
new file mode 100644
index 00000000000..fded4e2fd1a
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp
@@ -0,0 +1,332 @@
+#include "stackcollect.h"
+
+#include "profiler.h"
+
+#include <util/generic/algorithm.h>
+#include <util/generic/vector.h>
+#include <util/stream/format.h>
+#include <util/stream/str.h>
+#include <util/string/cast.h>
+#include <util/string/printf.h>
+#include <util/system/backtrace.h>
+#include <util/system/spinlock.h>
+#include <util/system/yassert.h>
+
+
+namespace NAllocProfiler {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+class TStackCollector: private TNonCopyable {
+public:
+ struct TFrameInfo {
+ int PrevInd;
+ void* Addr;
+ int Tag;
+ T Stats;
+
+ void Clear()
+ {
+ PrevInd = 0;
+ Addr = nullptr;
+ Tag = 0;
+ Stats.Clear();
+ }
+ };
+
+private:
+ static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024;
+ TFrameInfo Frames[STACKS_HASH_MAP_SIZE];
+
+ ui64 Samples; // Saved samples count
+ ui64 UniqueSamples; // Number of unique addresses
+ ui64 UsedSlots; // Number of occupied slots in the hashtable
+ ui64 DroppedSamples; // Number of unsaved addresses
+ ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions
+
+ TAdaptiveLock Lock;
+
+public:
+ TStackCollector()
+ {
+ Clear();
+ }
+
+ int AddStack(void** stack, size_t frameCount, int tag)
+ {
+ Y_ASSERT(frameCount > 0);
+
+ int prevInd = -1;
+ with_lock (Lock) {
+ for (int i = frameCount - 1; i >= 0; --i) {
+ prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0));
+ if (prevInd == -1) {
+ break;
+ }
+ }
+ }
+ return prevInd;
+ }
+
+ T& GetStats(int stackId)
+ {
+ Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames));
+ Y_ASSERT(!IsSlotEmpty(stackId));
+
+ return Frames[stackId].Stats;
+ }
+
+ const TFrameInfo* GetFrames() const
+ {
+ return Frames;
+ }
+
+ size_t GetFramesCount() const
+ {
+ return Y_ARRAY_SIZE(Frames);
+ }
+
+ void BackTrace(const TFrameInfo* stack, TStackVec<void*, 64>& frames) const
+ {
+ frames.clear();
+ for (size_t i = 0; i < 100; ++i) {
+ frames.push_back(stack->Addr);
+ int prevInd = stack->PrevInd;
+ if (prevInd == -1) {
+ break;
+ }
+ stack = &Frames[prevInd];
+ }
+ }
+
+ void Clear()
+ {
+ for (auto& frame: Frames) {
+ frame.Clear();
+ }
+
+ Samples = 0;
+ DroppedSamples = 0;
+ UniqueSamples = 0;
+ UsedSlots = 0;
+ SearchSkipCount = 0;
+ }
+
+private:
+ // Hash function applied to the addresses
+ static ui32 Hash(void* addr, int prevInd, int tag)
+ {
+ return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE;
+ }
+
+ static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag)
+ {
+ return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag);
+ }
+
+ bool IsSlotEmpty(ui32 slot) const
+ {
+ return Frames[slot].Addr == 0;
+ }
+
+ bool InsertsAllowed() const
+ {
+ return UsedSlots < STACKS_HASH_MAP_SIZE / 2;
+ }
+
+ // returns the index in the hashmap
+ int AddFrame(void* addr, int prevFrameIndex, int tag, bool last)
+ {
+ ui32 slot = Hash(addr, prevFrameIndex, tag);
+ ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE;
+
+ while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) {
+ slot = (slot + 1) % STACKS_HASH_MAP_SIZE;
+ SearchSkipCount++;
+ }
+
+ if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) {
+ if (last) {
+ ++Samples;
+ }
+ } else if (InsertsAllowed() && IsSlotEmpty(slot)) {
+ // add new sample
+ Frames[slot].Clear();
+ Frames[slot].Addr = addr;
+ Frames[slot].PrevInd = prevFrameIndex;
+ Frames[slot].Tag = tag;
+ ++UsedSlots;
+ if (last) {
+ ++UniqueSamples;
+ ++Samples;
+ }
+ } else {
+ // don't insert new sample if the search is becoming too slow
+ ++DroppedSamples;
+ return -1;
+ }
+
+ return slot;
+ }
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAllocationStackCollector::TImpl: public TStackCollector<TStats> {
+ using TBase = TStackCollector<TStats>;
+
+private:
+ TStats Total;
+
+public:
+ int Alloc(void** stack, size_t frameCount, int tag, size_t size)
+ {
+ int stackId = TBase::AddStack(stack, frameCount, tag);
+ if (stackId >= 0) {
+ TBase::GetStats(stackId).Alloc(size);
+ Total.Alloc(size);
+ }
+ return stackId;
+ }
+
+ void Free(int stackId, size_t size)
+ {
+ TBase::GetStats(stackId).Free(size);
+ Total.Free(size);
+ }
+
+ void Clear()
+ {
+ TBase::Clear();
+ Total.Clear();
+ }
+
+ void Dump(int count, IAllocationStatsDumper& out) const
+ {
+ const TFrameInfo* frames = TBase::GetFrames();
+ size_t framesCount = TBase::GetFramesCount();
+
+ TVector<const TFrameInfo*> stacks;
+ for (size_t i = 0; i < framesCount; ++i) {
+ if (frames[i].Stats.Allocs) {
+ stacks.push_back(&frames[i]);
+ }
+ }
+
+ Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) {
+ const auto& ls = l->Stats;
+ const auto& rs = r->Stats;
+ return ls.CurrentSize != rs.CurrentSize
+ ? ls.CurrentSize > rs.CurrentSize
+ : ls.Allocs != rs.Allocs
+ ? ls.Allocs > rs.Allocs
+ : ls.Frees > rs.Frees;
+ });
+
+ out.DumpTotal(Total);
+
+ TAllocationInfo allocInfo;
+ int printedCount = 0;
+ for (const TFrameInfo* stack: stacks) {
+ allocInfo.Clear();
+ allocInfo.Tag = stack->Tag;
+ allocInfo.Stats = stack->Stats;
+ TBase::BackTrace(stack, allocInfo.Stack);
+
+ out.DumpEntry(allocInfo);
+
+ if (++printedCount >= count) {
+ break;
+ }
+ }
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+TAllocationStackCollector::TAllocationStackCollector()
+ : Impl(new TImpl())
+{}
+
+TAllocationStackCollector::~TAllocationStackCollector()
+{}
+
+int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size)
+{
+ return Impl->Alloc(stack, frameCount, tag, size);
+}
+
+void TAllocationStackCollector::Free(int stackId, size_t size)
+{
+ Impl->Free(stackId, size);
+}
+
+void TAllocationStackCollector::Clear()
+{
+ Impl->Clear();
+}
+
+void TAllocationStackCollector::Dump(int count, IAllocationStatsDumper &out) const
+{
+ Impl->Dump(count, out);
+}
+
+
+TString IAllocationStatsDumper::FormatTag(int tag) {
+ return ToString(tag);
+}
+
+TString IAllocationStatsDumper::FormatSize(intptr_t sz) {
+ return ToString(sz);
+}
+
+
+TAllocationStatsDumper::TAllocationStatsDumper(IOutputStream& out)
+ : PrintedCount(0)
+ , Out(out)
+ , SymbolCache(2048)
+{}
+
+void TAllocationStatsDumper::DumpTotal(const TStats& total) {
+ Out << "TOTAL"
+ << "\tAllocs: " << total.Allocs
+ << "\tFrees: " << total.Frees
+ << "\tCurrentSize: " << FormatSize(total.CurrentSize)
+ << Endl;
+}
+
+void TAllocationStatsDumper::DumpEntry(const TAllocationInfo& allocInfo) {
+ Out << Endl
+ << "STACK #" << PrintedCount+1 << ": " << FormatTag(allocInfo.Tag)
+ << "\tAllocs: " << allocInfo.Stats.Allocs
+ << "\tFrees: " << allocInfo.Stats.Frees
+ << "\tCurrentSize: " << FormatSize(allocInfo.Stats.CurrentSize)
+ << Endl;
+ FormatBackTrace(allocInfo.Stack.data(), allocInfo.Stack.size());
+ PrintedCount++;
+}
+
+void TAllocationStatsDumper::FormatBackTrace(void* const* stack, size_t sz) {
+ char name[1024];
+ for (size_t i = 0; i < sz; ++i) {
+ TSymbol symbol;
+ auto it = SymbolCache.Find(stack[i]);
+ if (it != SymbolCache.End()) {
+ symbol = it.Value();
+ } else {
+ TResolvedSymbol rs = ResolveSymbol(stack[i], name, sizeof(name));
+ symbol = {rs.NearestSymbol, rs.Name};
+ SymbolCache.Insert(stack[i], symbol);
+ }
+
+ Out << Hex((intptr_t)stack[i], HF_FULL) << "\t" << symbol.Name;
+ intptr_t offset = (intptr_t)stack[i] - (intptr_t)symbol.Address;
+ if (offset)
+ Out << " +" << offset;
+ Out << Endl;
+ }
+}
+
+} // namespace NAllocProfiler
diff --git a/library/cpp/lfalloc/alloc_profiler/stackcollect.h b/library/cpp/lfalloc/alloc_profiler/stackcollect.h
new file mode 100644
index 00000000000..80715ed7cb5
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/stackcollect.h
@@ -0,0 +1,107 @@
+#pragma once
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/cache/cache.h>
+
+#include <util/generic/noncopyable.h>
+#include <util/generic/ptr.h>
+#include <util/stream/output.h>
+
+namespace NAllocProfiler {
+
+struct TStats {
+ intptr_t Allocs = 0;
+ intptr_t Frees = 0;
+ intptr_t CurrentSize = 0;
+
+ void Clear()
+ {
+ Allocs = 0;
+ Frees = 0;
+ CurrentSize = 0;
+ }
+
+ void Alloc(size_t size)
+ {
+ AtomicIncrement(Allocs);
+ AtomicAdd(CurrentSize, size);
+ }
+
+ void Free(size_t size)
+ {
+ AtomicIncrement(Frees);
+ AtomicSub(CurrentSize, size);
+ }
+};
+
+struct TAllocationInfo {
+ int Tag;
+ TStats Stats;
+ TStackVec<void*, 64> Stack;
+
+ void Clear() {
+ Tag = 0;
+ Stats.Clear();
+ Stack.clear();
+ }
+};
+
+
+class IAllocationStatsDumper {
+public:
+ virtual ~IAllocationStatsDumper() = default;
+
+ // Total stats
+ virtual void DumpTotal(const TStats& total) = 0;
+
+ // Stats for individual stack
+ virtual void DumpEntry(const TAllocationInfo& allocInfo) = 0;
+
+ // App-specific tag printer
+ virtual TString FormatTag(int tag);
+
+ // Size printer (e.g. "10KB", "100MB", "over 9000")
+ virtual TString FormatSize(intptr_t sz);
+};
+
+// Default implementation
+class TAllocationStatsDumper: public IAllocationStatsDumper {
+public:
+ explicit TAllocationStatsDumper(IOutputStream& out);
+ void DumpTotal(const TStats& total) override;
+ void DumpEntry(const TAllocationInfo& allocInfo) override;
+
+private:
+ void FormatBackTrace(void* const* stack, size_t sz);
+
+private:
+ struct TSymbol {
+ const void* Address;
+ TString Name;
+ };
+
+ size_t PrintedCount;
+ IOutputStream& Out;
+ TLFUCache<void*, TSymbol> SymbolCache;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TAllocationStackCollector: private TNonCopyable {
+private:
+ class TImpl;
+ THolder<TImpl> Impl;
+
+public:
+ TAllocationStackCollector();
+ ~TAllocationStackCollector();
+
+ int Alloc(void** stack, size_t frameCount, int tag, size_t size);
+ void Free(int stackId, size_t size);
+
+ void Clear();
+
+ void Dump(int count, IAllocationStatsDumper& out) const;
+};
+
+} // namespace NAllocProfiler
diff --git a/library/cpp/lfalloc/alloc_profiler/ut/ya.make b/library/cpp/lfalloc/alloc_profiler/ut/ya.make
new file mode 100644
index 00000000000..8a7daa74af6
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/ut/ya.make
@@ -0,0 +1,22 @@
+UNITTEST_FOR(library/cpp/lfalloc/alloc_profiler)
+
+OWNER(g:rtmr g:kikimr)
+
+PEERDIR(
+ library/cpp/testing/unittest
+)
+
+IF (ARCH_AARCH64)
+ PEERDIR(
+ contrib/libs/jemalloc
+ )
+ELSE()
+ ALLOCATOR(LF_DBG)
+ENDIF()
+
+SRCS(
+ profiler_ut.cpp
+ align_ut.cpp
+)
+
+END()
diff --git a/library/cpp/lfalloc/alloc_profiler/ya.make b/library/cpp/lfalloc/alloc_profiler/ya.make
new file mode 100644
index 00000000000..0f58d917678
--- /dev/null
+++ b/library/cpp/lfalloc/alloc_profiler/ya.make
@@ -0,0 +1,17 @@
+LIBRARY()
+
+OWNER(g:rtmr g:kikimr)
+
+SRCS(
+ profiler.cpp
+ stackcollect.cpp
+)
+
+PEERDIR(
+ library/cpp/lfalloc/dbg_info
+ library/cpp/cache
+)
+
+END()
+
+RECURSE(ut)