diff options
author | vskipin <vskipin@yandex-team.ru> | 2022-02-10 16:46:00 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:00 +0300 |
commit | 4e4b78bd7b67e2533da4dbb9696374a6d6068e32 (patch) | |
tree | a7a5543d815c451256ece74081d960b4e1d70ec2 /library/cpp/lfalloc | |
parent | 5b00ed04a5137a452fa6d3423cb0c9b54ac27408 (diff) | |
download | ydb-4e4b78bd7b67e2533da4dbb9696374a6d6068e32.tar.gz |
Restoring authorship annotation for <vskipin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/lfalloc')
-rw-r--r-- | library/cpp/lfalloc/alloc_profiler/profiler.cpp | 142 | ||||
-rw-r--r-- | library/cpp/lfalloc/alloc_profiler/profiler.h | 74 | ||||
-rw-r--r-- | library/cpp/lfalloc/alloc_profiler/stackcollect.cpp | 504 | ||||
-rw-r--r-- | library/cpp/lfalloc/alloc_profiler/stackcollect.h | 48 | ||||
-rw-r--r-- | library/cpp/lfalloc/dbg/ya.make | 50 | ||||
-rw-r--r-- | library/cpp/lfalloc/dbg_info/dbg_info.cpp | 70 | ||||
-rw-r--r-- | library/cpp/lfalloc/dbg_info/dbg_info.h | 44 | ||||
-rw-r--r-- | library/cpp/lfalloc/dbg_info/ya.make | 26 | ||||
-rw-r--r-- | library/cpp/lfalloc/lf_allocX64.h | 664 |
9 files changed, 811 insertions, 811 deletions
diff --git a/library/cpp/lfalloc/alloc_profiler/profiler.cpp b/library/cpp/lfalloc/alloc_profiler/profiler.cpp index 0e30927a5a..4c081bf64f 100644 --- a/library/cpp/lfalloc/alloc_profiler/profiler.cpp +++ b/library/cpp/lfalloc/alloc_profiler/profiler.cpp @@ -1,81 +1,81 @@ -#include "profiler.h" +#include "profiler.h" -#include "stackcollect.h" - -#include <util/generic/algorithm.h> -#include <util/generic/singleton.h> +#include "stackcollect.h" + +#include <util/generic/algorithm.h> +#include <util/generic/singleton.h> #include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/stream/str.h> - +#include <util/generic/vector.h> +#include <util/stream/str.h> + namespace NAllocProfiler { - -namespace { - -static TAllocationStackCollector& AllocationStackCollector() -{ - return *Singleton<TAllocationStackCollector>(); -} - -int AllocationCallback(int tag, size_t size, int sizeIdx) -{ - Y_UNUSED(sizeIdx); - - static const size_t STACK_FRAMES_COUNT = 32; - static const size_t STACK_FRAMES_SKIP = 1; - - void* frames[STACK_FRAMES_COUNT]; - size_t frameCount = BackTrace(frames, Y_ARRAY_SIZE(frames)); - if (frameCount <= STACK_FRAMES_SKIP) { - return -1; - } - - void** stack = &frames[STACK_FRAMES_SKIP]; - frameCount -= STACK_FRAMES_SKIP; - - auto& collector = AllocationStackCollector(); - return collector.Alloc(stack, frameCount, tag, size); -} - -void DeallocationCallback(int stackId, int tag, size_t size, int sizeIdx) -{ - Y_UNUSED(tag); - Y_UNUSED(sizeIdx); - - auto& collector = AllocationStackCollector(); - collector.Free(stackId, size); -} - -} // namespace - -//////////////////////////////////////////////////////////////////////////////// - + +namespace { + +static TAllocationStackCollector& AllocationStackCollector() +{ + return *Singleton<TAllocationStackCollector>(); +} + +int AllocationCallback(int tag, size_t size, int sizeIdx) +{ + Y_UNUSED(sizeIdx); + + static const size_t STACK_FRAMES_COUNT = 32; + static const size_t STACK_FRAMES_SKIP = 1; + + void* frames[STACK_FRAMES_COUNT]; + size_t frameCount = BackTrace(frames, Y_ARRAY_SIZE(frames)); + if (frameCount <= STACK_FRAMES_SKIP) { + return -1; + } + + void** stack = &frames[STACK_FRAMES_SKIP]; + frameCount -= STACK_FRAMES_SKIP; + + auto& collector = AllocationStackCollector(); + return collector.Alloc(stack, frameCount, tag, size); +} + +void DeallocationCallback(int stackId, int tag, size_t size, int sizeIdx) +{ + Y_UNUSED(tag); + Y_UNUSED(sizeIdx); + + auto& collector = AllocationStackCollector(); + collector.Free(stackId, size); +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + bool StartAllocationSampling(bool profileAllThreads) -{ - auto& collector = AllocationStackCollector(); - collector.Clear(); - +{ + auto& collector = AllocationStackCollector(); + collector.Clear(); + NAllocDbg::SetProfileAllThreads(profileAllThreads); - NAllocDbg::SetAllocationCallback(AllocationCallback); - NAllocDbg::SetDeallocationCallback(DeallocationCallback); - NAllocDbg::SetAllocationSamplingEnabled(true); - return true; -} - + NAllocDbg::SetAllocationCallback(AllocationCallback); + NAllocDbg::SetDeallocationCallback(DeallocationCallback); + NAllocDbg::SetAllocationSamplingEnabled(true); + return true; +} + bool StopAllocationSampling(IAllocationStatsDumper &out, int count) -{ - NAllocDbg::SetAllocationCallback(nullptr); - NAllocDbg::SetDeallocationCallback(nullptr); - NAllocDbg::SetAllocationSamplingEnabled(false); - - auto& collector = AllocationStackCollector(); +{ + NAllocDbg::SetAllocationCallback(nullptr); + NAllocDbg::SetDeallocationCallback(nullptr); + NAllocDbg::SetAllocationSamplingEnabled(false); + + auto& collector = AllocationStackCollector(); collector.Dump(count, out); - return true; -} - + return true; +} + bool StopAllocationSampling(IOutputStream& out, int count) { TAllocationStatsDumper dumper(out); return StopAllocationSampling(dumper, count); -} - -} // namespace NProfiler +} + +} // namespace NProfiler diff --git a/library/cpp/lfalloc/alloc_profiler/profiler.h b/library/cpp/lfalloc/alloc_profiler/profiler.h index 4ea49b9dcc..0cd816fa03 100644 --- a/library/cpp/lfalloc/alloc_profiler/profiler.h +++ b/library/cpp/lfalloc/alloc_profiler/profiler.h @@ -1,45 +1,45 @@ -#pragma once - +#pragma once + #include "stackcollect.h" - + #include <library/cpp/lfalloc/dbg_info/dbg_info.h> - -#include <util/generic/noncopyable.h> + +#include <util/generic/noncopyable.h> #include <util/stream/output.h> - + namespace NAllocProfiler { - -//////////////////////////////////////////////////////////////////////////////// - -inline int SetCurrentScopeTag(int value) -{ - return NAllocDbg::SetThreadAllocTag(value); -} - -inline bool SetProfileCurrentThread(bool value) -{ - return NAllocDbg::SetProfileCurrentThread(value); -} - + +//////////////////////////////////////////////////////////////////////////////// + +inline int SetCurrentScopeTag(int value) +{ + return NAllocDbg::SetThreadAllocTag(value); +} + +inline bool SetProfileCurrentThread(bool value) +{ + return NAllocDbg::SetProfileCurrentThread(value); +} + bool StartAllocationSampling(bool profileAllThreads = false); bool StopAllocationSampling(IAllocationStatsDumper& out, int count = 100); bool StopAllocationSampling(IOutputStream& out, int count = 100); - -//////////////////////////////////////////////////////////////////////////////// - -class TProfilingScope: private TNonCopyable { -private: - const int Prev; - -public: - explicit TProfilingScope(int value) - : Prev(SetCurrentScopeTag(value)) - {} - - ~TProfilingScope() - { - SetCurrentScopeTag(Prev); - } -}; - + +//////////////////////////////////////////////////////////////////////////////// + +class TProfilingScope: private TNonCopyable { +private: + const int Prev; + +public: + explicit TProfilingScope(int value) + : Prev(SetCurrentScopeTag(value)) + {} + + ~TProfilingScope() + { + SetCurrentScopeTag(Prev); + } +}; + } // namespace NAllocProfiler diff --git a/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp index fded4e2fd1..5a0c920451 100644 --- a/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp +++ b/library/cpp/lfalloc/alloc_profiler/stackcollect.cpp @@ -1,278 +1,278 @@ -#include "stackcollect.h" +#include "stackcollect.h" -#include "profiler.h" - -#include <util/generic/algorithm.h> -#include <util/generic/vector.h> -#include <util/stream/format.h> +#include "profiler.h" + +#include <util/generic/algorithm.h> +#include <util/generic/vector.h> +#include <util/stream/format.h> #include <util/stream/str.h> #include <util/string/cast.h> #include <util/string/printf.h> -#include <util/system/backtrace.h> -#include <util/system/spinlock.h> -#include <util/system/yassert.h> - - +#include <util/system/backtrace.h> +#include <util/system/spinlock.h> +#include <util/system/yassert.h> + + namespace NAllocProfiler { -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -class TStackCollector: private TNonCopyable { -public: - struct TFrameInfo { - int PrevInd; - void* Addr; - int Tag; - T Stats; - - void Clear() - { - PrevInd = 0; - Addr = nullptr; - Tag = 0; - Stats.Clear(); - } - }; - -private: - static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024; - TFrameInfo Frames[STACKS_HASH_MAP_SIZE]; - - ui64 Samples; // Saved samples count - ui64 UniqueSamples; // Number of unique addresses - ui64 UsedSlots; // Number of occupied slots in the hashtable - ui64 DroppedSamples; // Number of unsaved addresses - ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions - - TAdaptiveLock Lock; - -public: - TStackCollector() - { - Clear(); - } - - int AddStack(void** stack, size_t frameCount, int tag) - { - Y_ASSERT(frameCount > 0); - - int prevInd = -1; - with_lock (Lock) { - for (int i = frameCount - 1; i >= 0; --i) { - prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0)); - if (prevInd == -1) { - break; - } - } - } - return prevInd; - } - - T& GetStats(int stackId) - { - Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames)); - Y_ASSERT(!IsSlotEmpty(stackId)); - - return Frames[stackId].Stats; - } - - const TFrameInfo* GetFrames() const - { - return Frames; - } - - size_t GetFramesCount() const - { - return Y_ARRAY_SIZE(Frames); - } - +//////////////////////////////////////////////////////////////////////////////// + +template <typename T> +class TStackCollector: private TNonCopyable { +public: + struct TFrameInfo { + int PrevInd; + void* Addr; + int Tag; + T Stats; + + void Clear() + { + PrevInd = 0; + Addr = nullptr; + Tag = 0; + Stats.Clear(); + } + }; + +private: + static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024; + TFrameInfo Frames[STACKS_HASH_MAP_SIZE]; + + ui64 Samples; // Saved samples count + ui64 UniqueSamples; // Number of unique addresses + ui64 UsedSlots; // Number of occupied slots in the hashtable + ui64 DroppedSamples; // Number of unsaved addresses + ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions + + TAdaptiveLock Lock; + +public: + TStackCollector() + { + Clear(); + } + + int AddStack(void** stack, size_t frameCount, int tag) + { + Y_ASSERT(frameCount > 0); + + int prevInd = -1; + with_lock (Lock) { + for (int i = frameCount - 1; i >= 0; --i) { + prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0)); + if (prevInd == -1) { + break; + } + } + } + return prevInd; + } + + T& GetStats(int stackId) + { + Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames)); + Y_ASSERT(!IsSlotEmpty(stackId)); + + return Frames[stackId].Stats; + } + + const TFrameInfo* GetFrames() const + { + return Frames; + } + + size_t GetFramesCount() const + { + return Y_ARRAY_SIZE(Frames); + } + void BackTrace(const TFrameInfo* stack, TStackVec<void*, 64>& frames) const - { + { frames.clear(); - for (size_t i = 0; i < 100; ++i) { + for (size_t i = 0; i < 100; ++i) { frames.push_back(stack->Addr); - int prevInd = stack->PrevInd; - if (prevInd == -1) { - break; - } - stack = &Frames[prevInd]; - } - } - - void Clear() - { - for (auto& frame: Frames) { - frame.Clear(); - } - - Samples = 0; - DroppedSamples = 0; - UniqueSamples = 0; - UsedSlots = 0; - SearchSkipCount = 0; - } - -private: - // Hash function applied to the addresses - static ui32 Hash(void* addr, int prevInd, int tag) - { - return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE; - } - - static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag) - { - return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag); - } - - bool IsSlotEmpty(ui32 slot) const - { - return Frames[slot].Addr == 0; - } - - bool InsertsAllowed() const - { - return UsedSlots < STACKS_HASH_MAP_SIZE / 2; - } - - // returns the index in the hashmap - int AddFrame(void* addr, int prevFrameIndex, int tag, bool last) - { - ui32 slot = Hash(addr, prevFrameIndex, tag); - ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE; - - while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) { - slot = (slot + 1) % STACKS_HASH_MAP_SIZE; - SearchSkipCount++; - } - - if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) { - if (last) { - ++Samples; - } - } else if (InsertsAllowed() && IsSlotEmpty(slot)) { - // add new sample - Frames[slot].Clear(); - Frames[slot].Addr = addr; - Frames[slot].PrevInd = prevFrameIndex; - Frames[slot].Tag = tag; - ++UsedSlots; - if (last) { - ++UniqueSamples; - ++Samples; - } - } else { - // don't insert new sample if the search is becoming too slow - ++DroppedSamples; - return -1; - } - - return slot; - } -}; - - -//////////////////////////////////////////////////////////////////////////////// - + int prevInd = stack->PrevInd; + if (prevInd == -1) { + break; + } + stack = &Frames[prevInd]; + } + } + + void Clear() + { + for (auto& frame: Frames) { + frame.Clear(); + } + + Samples = 0; + DroppedSamples = 0; + UniqueSamples = 0; + UsedSlots = 0; + SearchSkipCount = 0; + } + +private: + // Hash function applied to the addresses + static ui32 Hash(void* addr, int prevInd, int tag) + { + return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE; + } + + static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag) + { + return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag); + } + + bool IsSlotEmpty(ui32 slot) const + { + return Frames[slot].Addr == 0; + } + + bool InsertsAllowed() const + { + return UsedSlots < STACKS_HASH_MAP_SIZE / 2; + } + + // returns the index in the hashmap + int AddFrame(void* addr, int prevFrameIndex, int tag, bool last) + { + ui32 slot = Hash(addr, prevFrameIndex, tag); + ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE; + + while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) { + slot = (slot + 1) % STACKS_HASH_MAP_SIZE; + SearchSkipCount++; + } + + if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) { + if (last) { + ++Samples; + } + } else if (InsertsAllowed() && IsSlotEmpty(slot)) { + // add new sample + Frames[slot].Clear(); + Frames[slot].Addr = addr; + Frames[slot].PrevInd = prevFrameIndex; + Frames[slot].Tag = tag; + ++UsedSlots; + if (last) { + ++UniqueSamples; + ++Samples; + } + } else { + // don't insert new sample if the search is becoming too slow + ++DroppedSamples; + return -1; + } + + return slot; + } +}; + + +//////////////////////////////////////////////////////////////////////////////// + class TAllocationStackCollector::TImpl: public TStackCollector<TStats> { using TBase = TStackCollector<TStats>; - -private: + +private: TStats Total; - -public: - int Alloc(void** stack, size_t frameCount, int tag, size_t size) - { - int stackId = TBase::AddStack(stack, frameCount, tag); - if (stackId >= 0) { - TBase::GetStats(stackId).Alloc(size); - Total.Alloc(size); - } - return stackId; - } - - void Free(int stackId, size_t size) - { - TBase::GetStats(stackId).Free(size); - Total.Free(size); - } - - void Clear() - { - TBase::Clear(); - Total.Clear(); - } - + +public: + int Alloc(void** stack, size_t frameCount, int tag, size_t size) + { + int stackId = TBase::AddStack(stack, frameCount, tag); + if (stackId >= 0) { + TBase::GetStats(stackId).Alloc(size); + Total.Alloc(size); + } + return stackId; + } + + void Free(int stackId, size_t size) + { + TBase::GetStats(stackId).Free(size); + Total.Free(size); + } + + void Clear() + { + TBase::Clear(); + Total.Clear(); + } + void Dump(int count, IAllocationStatsDumper& out) const - { - const TFrameInfo* frames = TBase::GetFrames(); - size_t framesCount = TBase::GetFramesCount(); - + { + const TFrameInfo* frames = TBase::GetFrames(); + size_t framesCount = TBase::GetFramesCount(); + TVector<const TFrameInfo*> stacks; - for (size_t i = 0; i < framesCount; ++i) { - if (frames[i].Stats.Allocs) { - stacks.push_back(&frames[i]); - } - } - - Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) { - const auto& ls = l->Stats; - const auto& rs = r->Stats; - return ls.CurrentSize != rs.CurrentSize - ? ls.CurrentSize > rs.CurrentSize - : ls.Allocs != rs.Allocs - ? ls.Allocs > rs.Allocs - : ls.Frees > rs.Frees; - }); - + for (size_t i = 0; i < framesCount; ++i) { + if (frames[i].Stats.Allocs) { + stacks.push_back(&frames[i]); + } + } + + Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) { + const auto& ls = l->Stats; + const auto& rs = r->Stats; + return ls.CurrentSize != rs.CurrentSize + ? ls.CurrentSize > rs.CurrentSize + : ls.Allocs != rs.Allocs + ? ls.Allocs > rs.Allocs + : ls.Frees > rs.Frees; + }); + out.DumpTotal(Total); - + TAllocationInfo allocInfo; - int printedCount = 0; - for (const TFrameInfo* stack: stacks) { + int printedCount = 0; + for (const TFrameInfo* stack: stacks) { allocInfo.Clear(); allocInfo.Tag = stack->Tag; allocInfo.Stats = stack->Stats; TBase::BackTrace(stack, allocInfo.Stack); - + out.DumpEntry(allocInfo); - if (++printedCount >= count) { - break; - } - } - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -TAllocationStackCollector::TAllocationStackCollector() - : Impl(new TImpl()) -{} - -TAllocationStackCollector::~TAllocationStackCollector() -{} - -int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size) -{ - return Impl->Alloc(stack, frameCount, tag, size); -} - -void TAllocationStackCollector::Free(int stackId, size_t size) -{ - Impl->Free(stackId, size); -} - -void TAllocationStackCollector::Clear() -{ - Impl->Clear(); -} - + if (++printedCount >= count) { + break; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +TAllocationStackCollector::TAllocationStackCollector() + : Impl(new TImpl()) +{} + +TAllocationStackCollector::~TAllocationStackCollector() +{} + +int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size) +{ + return Impl->Alloc(stack, frameCount, tag, size); +} + +void TAllocationStackCollector::Free(int stackId, size_t size) +{ + Impl->Free(stackId, size); +} + +void TAllocationStackCollector::Clear() +{ + Impl->Clear(); +} + void TAllocationStackCollector::Dump(int count, IAllocationStatsDumper &out) const -{ - Impl->Dump(count, out); -} - +{ + Impl->Dump(count, out); +} + TString IAllocationStatsDumper::FormatTag(int tag) { return ToString(tag); diff --git a/library/cpp/lfalloc/alloc_profiler/stackcollect.h b/library/cpp/lfalloc/alloc_profiler/stackcollect.h index 80715ed7cb..8c0d65b296 100644 --- a/library/cpp/lfalloc/alloc_profiler/stackcollect.h +++ b/library/cpp/lfalloc/alloc_profiler/stackcollect.h @@ -1,14 +1,14 @@ -#pragma once - +#pragma once + #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/cache/cache.h> -#include <util/generic/noncopyable.h> -#include <util/generic/ptr.h> +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> #include <util/stream/output.h> - + namespace NAllocProfiler { - + struct TStats { intptr_t Allocs = 0; intptr_t Frees = 0; @@ -85,23 +85,23 @@ private: TLFUCache<void*, TSymbol> SymbolCache; }; -//////////////////////////////////////////////////////////////////////////////// - -class TAllocationStackCollector: private TNonCopyable { -private: - class TImpl; - THolder<TImpl> Impl; - -public: - TAllocationStackCollector(); - ~TAllocationStackCollector(); - - int Alloc(void** stack, size_t frameCount, int tag, size_t size); - void Free(int stackId, size_t size); - - void Clear(); - +//////////////////////////////////////////////////////////////////////////////// + +class TAllocationStackCollector: private TNonCopyable { +private: + class TImpl; + THolder<TImpl> Impl; + +public: + TAllocationStackCollector(); + ~TAllocationStackCollector(); + + int Alloc(void** stack, size_t frameCount, int tag, size_t size); + void Free(int stackId, size_t size); + + void Clear(); + void Dump(int count, IAllocationStatsDumper& out) const; -}; - +}; + } // namespace NAllocProfiler diff --git a/library/cpp/lfalloc/dbg/ya.make b/library/cpp/lfalloc/dbg/ya.make index 3dce653a8c..387050fc67 100644 --- a/library/cpp/lfalloc/dbg/ya.make +++ b/library/cpp/lfalloc/dbg/ya.make @@ -1,32 +1,32 @@ -LIBRARY() +LIBRARY() -OWNER(vskipin) +OWNER(vskipin) + +NO_UTIL() -NO_UTIL() - -NO_COMPILER_WARNINGS() - -IF (ARCH_AARCH64) - PEERDIR( - contrib/libs/jemalloc - ) -ELSE() - IF ("${YMAKE}" MATCHES "devtools") - CFLAGS(-DYMAKE=1) - ENDIF() +NO_COMPILER_WARNINGS() + +IF (ARCH_AARCH64) + PEERDIR( + contrib/libs/jemalloc + ) +ELSE() + IF ("${YMAKE}" MATCHES "devtools") + CFLAGS(-DYMAKE=1) + ENDIF() CXXFLAGS( -DLFALLOC_DBG -DLFALLOC_YT ) - SRCS( - ../lf_allocX64.cpp - ) -ENDIF() - -PEERDIR( + SRCS( + ../lf_allocX64.cpp + ) +ENDIF() + +PEERDIR( library/cpp/malloc/api -) - -SET(IDE_FOLDER "util") - -END() +) + +SET(IDE_FOLDER "util") + +END() diff --git a/library/cpp/lfalloc/dbg_info/dbg_info.cpp b/library/cpp/lfalloc/dbg_info/dbg_info.cpp index 1fb9f7ad93..c900cb4f96 100644 --- a/library/cpp/lfalloc/dbg_info/dbg_info.cpp +++ b/library/cpp/lfalloc/dbg_info/dbg_info.cpp @@ -1,83 +1,83 @@ -#include "dbg_info.h" - +#include "dbg_info.h" + #include <library/cpp/malloc/api/malloc.h> - -namespace NAllocDbg { + +namespace NAllocDbg { //////////////////////////////////////////////////////////////////////////////// - + using TGetAllocationCounter = i64(int counter); - + using TSetThreadAllocTag = int(int tag); using TGetPerTagAllocInfo = void( bool flushPerThreadCounters, TPerTagAllocInfo* info, int& maxTag, int& numSizes); - + using TSetProfileCurrentThread = bool(bool newVal); using TSetProfileAllThreads = bool(bool newVal); using TSetAllocationSamplingEnabled = bool(bool newVal); - + using TSetAllocationSampleRate = size_t(size_t newVal); using TSetAllocationSampleMaxSize = size_t(size_t newVal); - + using TSetAllocationCallback = TAllocationCallback*(TAllocationCallback* newVal); using TSetDeallocationCallback = TDeallocationCallback*(TDeallocationCallback* newVal); - + struct TAllocFn { TGetAllocationCounter* GetAllocationCounterFast = nullptr; TGetAllocationCounter* GetAllocationCounterFull = nullptr; - + TSetThreadAllocTag* SetThreadAllocTag = nullptr; TGetPerTagAllocInfo* GetPerTagAllocInfo = nullptr; - + TSetProfileCurrentThread* SetProfileCurrentThread = nullptr; TSetProfileAllThreads* SetProfileAllThreads = nullptr; TSetAllocationSamplingEnabled* SetAllocationSamplingEnabled = nullptr; - + TSetAllocationSampleRate* SetAllocationSampleRate = nullptr; TSetAllocationSampleMaxSize* SetAllocationSampleMaxSize = nullptr; - + TSetAllocationCallback* SetAllocationCallback = nullptr; TSetDeallocationCallback* SetDeallocationCallback = nullptr; - + TAllocFn() { auto mallocInfo = NMalloc::MallocInfo(); - + GetAllocationCounterFast = (TGetAllocationCounter*)mallocInfo.GetParam("GetLFAllocCounterFast"); GetAllocationCounterFull = (TGetAllocationCounter*)mallocInfo.GetParam("GetLFAllocCounterFull"); - + SetThreadAllocTag = (TSetThreadAllocTag*)mallocInfo.GetParam("SetThreadAllocTag"); GetPerTagAllocInfo = (TGetPerTagAllocInfo*)mallocInfo.GetParam("GetPerTagAllocInfo"); - + SetProfileCurrentThread = (TSetProfileCurrentThread*)mallocInfo.GetParam("SetProfileCurrentThread"); SetProfileAllThreads = (TSetProfileAllThreads*)mallocInfo.GetParam("SetProfileAllThreads"); SetAllocationSamplingEnabled = (TSetAllocationSamplingEnabled*)mallocInfo.GetParam("SetAllocationSamplingEnabled"); - + SetAllocationSampleRate = (TSetAllocationSampleRate*)mallocInfo.GetParam("SetAllocationSampleRate"); SetAllocationSampleMaxSize = (TSetAllocationSampleMaxSize*)mallocInfo.GetParam("SetAllocationSampleMaxSize"); - + SetAllocationCallback = (TSetAllocationCallback*)mallocInfo.GetParam("SetAllocationCallback"); SetDeallocationCallback = (TSetDeallocationCallback*)mallocInfo.GetParam("SetDeallocationCallback"); } }; - + //////////////////////////////////////////////////////////////////////////////// - + static TAllocFn AllocFn; - + i64 GetAllocationCounterFast(ELFAllocCounter counter) { return AllocFn.GetAllocationCounterFast ? AllocFn.GetAllocationCounterFast(counter) : 0; } - + i64 GetAllocationCounterFull(ELFAllocCounter counter) { return AllocFn.GetAllocationCounterFull ? AllocFn.GetAllocationCounterFull(counter) : 0; } - + int SetThreadAllocTag(int tag) { return AllocFn.SetThreadAllocTag ? AllocFn.SetThreadAllocTag(tag) : 0; } - + TArrayPtr<TPerTagAllocInfo> GetPerTagAllocInfo( bool flushPerThreadCounters, int& maxTag, @@ -92,11 +92,11 @@ namespace NAllocDbg { numSizes = 0; return nullptr; } - + bool SetProfileCurrentThread(bool newVal) { return AllocFn.SetProfileCurrentThread ? AllocFn.SetProfileCurrentThread(newVal) : false; - } - + } + bool SetProfileAllThreads(bool newVal) { return AllocFn.SetProfileAllThreads ? AllocFn.SetProfileAllThreads(newVal) : false; } @@ -104,21 +104,21 @@ namespace NAllocDbg { bool SetAllocationSamplingEnabled(bool newVal) { return AllocFn.SetAllocationSamplingEnabled ? AllocFn.SetAllocationSamplingEnabled(newVal) : false; } - + size_t SetAllocationSampleRate(size_t newVal) { return AllocFn.SetAllocationSampleRate ? AllocFn.SetAllocationSampleRate(newVal) : 0; } - + size_t SetAllocationSampleMaxSize(size_t newVal) { return AllocFn.SetAllocationSampleMaxSize ? AllocFn.SetAllocationSampleMaxSize(newVal) : 0; } - + TAllocationCallback* SetAllocationCallback(TAllocationCallback* newVal) { return AllocFn.SetAllocationCallback ? AllocFn.SetAllocationCallback(newVal) : nullptr; } - + TDeallocationCallback* SetDeallocationCallback(TDeallocationCallback* newVal) { return AllocFn.SetDeallocationCallback ? AllocFn.SetDeallocationCallback(newVal) : nullptr; } - -} + +} diff --git a/library/cpp/lfalloc/dbg_info/dbg_info.h b/library/cpp/lfalloc/dbg_info/dbg_info.h index 071562a81a..0c3299216d 100644 --- a/library/cpp/lfalloc/dbg_info/dbg_info.h +++ b/library/cpp/lfalloc/dbg_info/dbg_info.h @@ -1,12 +1,12 @@ -#pragma once - +#pragma once + #include <util/generic/ptr.h> -#include <util/system/types.h> - -namespace NAllocDbg { +#include <util/system/types.h> + +namespace NAllocDbg { //////////////////////////////////////////////////////////////////////////////// // Allocation statistics - + enum ELFAllocCounter { CT_USER_ALLOC, // accumulated size requested by user code CT_MMAP, // accumulated mmapped size @@ -23,55 +23,55 @@ namespace NAllocDbg { CT_DEGRAGMENT_CNT, // number of memory defragmentations CT_MAX }; - + i64 GetAllocationCounterFast(ELFAllocCounter counter); i64 GetAllocationCounterFull(ELFAllocCounter counter); - + //////////////////////////////////////////////////////////////////////////////// // Allocation statistics could be tracked on per-tag basis - + int SetThreadAllocTag(int tag); - + class TScopedTag { private: int PrevTag; - + public: explicit TScopedTag(int tag) { PrevTag = SetThreadAllocTag(tag); } - + ~TScopedTag() { SetThreadAllocTag(PrevTag); } }; - + struct TPerTagAllocInfo { ssize_t Count; ssize_t Size; }; - + TArrayPtr<TPerTagAllocInfo> GetPerTagAllocInfo( bool flushPerThreadCounters, int& maxTag, int& numSizes); - + //////////////////////////////////////////////////////////////////////////////// // Allocation sampling could be used to collect detailed information - + bool SetProfileCurrentThread(bool newVal); bool SetProfileAllThreads(bool newVal); bool SetAllocationSamplingEnabled(bool newVal); - + size_t SetAllocationSampleRate(size_t newVal); size_t SetAllocationSampleMaxSize(size_t newVal); - -#define DBG_ALLOC_INVALID_COOKIE (-1) - + +#define DBG_ALLOC_INVALID_COOKIE (-1) + using TAllocationCallback = int(int tag, size_t size, int sizeIdx); using TDeallocationCallback = void(int cookie, int tag, size_t size, int sizeIdx); - + TAllocationCallback* SetAllocationCallback(TAllocationCallback* newVal); TDeallocationCallback* SetDeallocationCallback(TDeallocationCallback* newVal); - + } diff --git a/library/cpp/lfalloc/dbg_info/ya.make b/library/cpp/lfalloc/dbg_info/ya.make index efecba5993..e7068a0b10 100644 --- a/library/cpp/lfalloc/dbg_info/ya.make +++ b/library/cpp/lfalloc/dbg_info/ya.make @@ -1,15 +1,15 @@ -LIBRARY() +LIBRARY() -OWNER(vskipin) - -PEERDIR( +OWNER(vskipin) + +PEERDIR( library/cpp/malloc/api -) - -SRCS( - dbg_info.cpp -) - -SET(IDE_FOLDER "util") - -END() +) + +SRCS( + dbg_info.cpp +) + +SET(IDE_FOLDER "util") + +END() diff --git a/library/cpp/lfalloc/lf_allocX64.h b/library/cpp/lfalloc/lf_allocX64.h index fd2a906d6f..616253d689 100644 --- a/library/cpp/lfalloc/lf_allocX64.h +++ b/library/cpp/lfalloc/lf_allocX64.h @@ -37,10 +37,10 @@ static inline long AtomicAdd(TAtomic& a, long b) { return _InterlockedExchangeAdd(&a, b) + b; } -static inline long AtomicSub(TAtomic& a, long b) { - return AtomicAdd(a, -b); -} - +static inline long AtomicSub(TAtomic& a, long b) { + return AtomicAdd(a, -b); +} + #pragma comment(lib, "synchronization.lib") #ifndef NDEBUG @@ -121,7 +121,7 @@ static inline long AtomicSub(TAtomic& a, long b) { #ifndef NDEBUG #define DBG_FILL_MEMORY -static bool FillMemoryOnAllocation = true; +static bool FillMemoryOnAllocation = true; #endif static bool TransparentHugePages = false; // force MADV_HUGEPAGE for large allocs @@ -189,9 +189,9 @@ const int N_SIZES = 25; #endif const int nSizeIdxToSize[N_SIZES] = { -1, -#if defined(_64_) - 16, 16, 32, 32, 48, 64, 96, 128, -#else +#if defined(_64_) + 16, 16, 32, 32, 48, 64, 96, 128, +#else 8, 16, 24, @@ -200,7 +200,7 @@ const int nSizeIdxToSize[N_SIZES] = { 64, 96, 128, -#endif +#endif 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, #ifdef LFALLOC_YT @@ -214,11 +214,11 @@ const size_t N_MAX_FAST_SIZE = 32768; #endif const unsigned char size2idxArr1[64 + 1] = { 1, -#if defined(_64_) +#if defined(_64_) 2, 2, 4, 4, // 16, 16, 32, 32 -#else +#else 1, 2, 3, 4, // 8, 16, 24, 32 -#endif +#endif 5, 5, 6, 6, // 48, 64 7, 7, 7, 7, 8, 8, 8, 8, // 96, 128 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, // 192, 256 @@ -312,25 +312,25 @@ inline void VerifyMmapResult(void* result) { static char* AllocWithMMapLinuxImpl(uintptr_t sz, EMMapMode mode) { char* volatile* areaPtr; char* areaStart; - uintptr_t areaFinish; - + uintptr_t areaFinish; + int mapProt = PROT_READ | PROT_WRITE; int mapFlags = MAP_PRIVATE | MAP_ANON; - - if (mode == MM_HUGE) { - areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointerHuge); - areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE); - areaFinish = N_HUGE_AREA_FINISH; - } else { - areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointer); - areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START); - areaFinish = N_MAX_WORKSET_SIZE; - - if (MapHugeTLB) { - mapFlags |= MAP_HUGETLB; - } - } - + + if (mode == MM_HUGE) { + areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointerHuge); + areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE); + areaFinish = N_HUGE_AREA_FINISH; + } else { + areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointer); + areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START); + areaFinish = N_MAX_WORKSET_SIZE; + + if (MapHugeTLB) { + mapFlags |= MAP_HUGETLB; + } + } + bool wrapped = false; for (;;) { char* prevAllocPtr = *areaPtr; @@ -340,24 +340,24 @@ static char* AllocWithMMapLinuxImpl(uintptr_t sz, EMMapMode mode) { NMalloc::AbortFromCorruptedAllocator("virtual memory is over fragmented"); } // wrap after all area is used - DoCas(areaPtr, areaStart, prevAllocPtr); + DoCas(areaPtr, areaStart, prevAllocPtr); wrapped = true; continue; } - if (DoCas(areaPtr, nextAllocPtr, prevAllocPtr) != prevAllocPtr) + if (DoCas(areaPtr, nextAllocPtr, prevAllocPtr) != prevAllocPtr) continue; char* largeBlock = (char*)mmap(prevAllocPtr, sz, mapProt, mapFlags, -1, 0); VerifyMmapResult(largeBlock); if (largeBlock == prevAllocPtr) - return largeBlock; + return largeBlock; if (largeBlock) munmap(largeBlock, sz); if (sz < 0x80000) { // skip utilized area with big steps - DoCas(areaPtr, nextAllocPtr + 0x10 * 0x10000, nextAllocPtr); + DoCas(areaPtr, nextAllocPtr + 0x10 * 0x10000, nextAllocPtr); } } } @@ -367,14 +367,14 @@ static char* AllocWithMMap(uintptr_t sz, EMMapMode mode) { (void)mode; #ifdef _MSC_VER char* largeBlock = (char*)VirtualAlloc(0, sz, MEM_RESERVE, PAGE_READWRITE); - if (Y_UNLIKELY(largeBlock == nullptr)) + if (Y_UNLIKELY(largeBlock == nullptr)) NMalloc::AbortFromCorruptedAllocator("out of memory"); if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE)) NMalloc::AbortFromCorruptedAllocator("out of working set, something has broken"); #else #if defined(_freebsd_) || !defined(_64_) char* largeBlock = (char*)mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); - VerifyMmapResult(largeBlock); + VerifyMmapResult(largeBlock); if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE)) NMalloc::AbortFromCorruptedAllocator("out of working set, something has broken"); #else @@ -384,7 +384,7 @@ static char* AllocWithMMap(uintptr_t sz, EMMapMode mode) { } #endif #endif - Y_ASSERT_NOBT(largeBlock); + Y_ASSERT_NOBT(largeBlock); IncrementCounter(CT_MMAP, sz); IncrementCounter(CT_MMAP_CNT, 1); return largeBlock; @@ -779,8 +779,8 @@ static bool DefragmentMem() { return false; } - IncrementCounter(CT_DEGRAGMENT_CNT, 1); - + IncrementCounter(CT_DEGRAGMENT_CNT, 1); + int* nFreeCount = (int*)SystemAlloc(N_CHUNKS * sizeof(int)); if (Y_UNLIKELY(!nFreeCount)) { //__debugbreak(); @@ -886,8 +886,8 @@ enum EDefrag { }; static void* SlowLFAlloc(int nSizeIdx, int blockSize, EDefrag defrag) { - IncrementCounter(CT_SLOW_ALLOC_CNT, 1); - + IncrementCounter(CT_SLOW_ALLOC_CNT, 1); + TLFLockHolder ls; for (;;) { bool locked = ls.TryLock(&LFGlobalLock); @@ -918,9 +918,9 @@ static void* SlowLFAlloc(int nSizeIdx, int blockSize, EDefrag defrag) { } char* largeBlock = AllocWithMMap(N_LARGE_ALLOC_SIZE, MM_NORMAL); - uintptr_t addr = ((largeBlock - ALLOC_START) + N_CHUNK_SIZE - 1) & (~(N_CHUNK_SIZE - 1)); - uintptr_t endAddr = ((largeBlock - ALLOC_START) + N_LARGE_ALLOC_SIZE) & (~(N_CHUNK_SIZE - 1)); - for (uintptr_t p = addr; p < endAddr; p += N_CHUNK_SIZE) { + uintptr_t addr = ((largeBlock - ALLOC_START) + N_CHUNK_SIZE - 1) & (~(N_CHUNK_SIZE - 1)); + uintptr_t endAddr = ((largeBlock - ALLOC_START) + N_LARGE_ALLOC_SIZE) & (~(N_CHUNK_SIZE - 1)); + for (uintptr_t p = addr; p < endAddr; p += N_CHUNK_SIZE) { uintptr_t chunk = p / N_CHUNK_SIZE; Y_ASSERT_NOBT(chunk * N_CHUNK_SIZE == p); Y_ASSERT_NOBT(chunkSizeIdx[chunk] == 0); @@ -1031,70 +1031,70 @@ struct TLocalCounter { } }; -//////////////////////////////////////////////////////////////////////////////// -// DBG stuff -//////////////////////////////////////////////////////////////////////////////// - -#if defined(LFALLOC_DBG) - -struct TPerTagAllocCounter { - TAtomic Size; - TAtomic Count; +//////////////////////////////////////////////////////////////////////////////// +// DBG stuff +//////////////////////////////////////////////////////////////////////////////// +#if defined(LFALLOC_DBG) + +struct TPerTagAllocCounter { + TAtomic Size; + TAtomic Count; + Y_FORCE_INLINE void Alloc(size_t size) { - AtomicAdd(Size, size); - AtomicAdd(Count, 1); - } - + AtomicAdd(Size, size); + AtomicAdd(Count, 1); + } + Y_FORCE_INLINE void Free(size_t size) { - AtomicSub(Size, size); - AtomicSub(Count, 1); - } -}; - -struct TLocalPerTagAllocCounter { - intptr_t Size; - int Count; - int Updates; - + AtomicSub(Size, size); + AtomicSub(Count, 1); + } +}; + +struct TLocalPerTagAllocCounter { + intptr_t Size; + int Count; + int Updates; + Y_FORCE_INLINE void Init() { - Size = 0; - Count = 0; - Updates = 0; - } - + Size = 0; + Count = 0; + Updates = 0; + } + Y_FORCE_INLINE void Alloc(TPerTagAllocCounter& parent, size_t size) { - Size += size; - ++Count; - if (++Updates > MAX_LOCAL_UPDATES) { - Flush(parent); - } - } - + Size += size; + ++Count; + if (++Updates > MAX_LOCAL_UPDATES) { + Flush(parent); + } + } + Y_FORCE_INLINE void Free(TPerTagAllocCounter& parent, size_t size) { - Size -= size; - --Count; - if (++Updates > MAX_LOCAL_UPDATES) { - Flush(parent); - } - } - + Size -= size; + --Count; + if (++Updates > MAX_LOCAL_UPDATES) { + Flush(parent); + } + } + Y_FORCE_INLINE void Flush(TPerTagAllocCounter& parent) { AtomicAdd(parent.Size, Size); Size = 0; AtomicAdd(parent.Count, Count); Count = 0; - Updates = 0; - } -}; - -static const int DBG_ALLOC_MAX_TAG = 1000; + Updates = 0; + } +}; + +static const int DBG_ALLOC_MAX_TAG = 1000; static const int DBG_ALLOC_ALIGNED_TAG = 0xF0000000; -static const int DBG_ALLOC_NUM_SIZES = 30; -static TPerTagAllocCounter GlobalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES]; - +static const int DBG_ALLOC_NUM_SIZES = 30; +static TPerTagAllocCounter GlobalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES]; + #endif // LFALLOC_DBG - + ////////////////////////////////////////////////////////////////////////// const int THREAD_BUF = 256; static int borderSizes[N_SIZES]; @@ -1107,9 +1107,9 @@ struct TThreadAllocInfo { TThreadAllocInfo* pNextInfo; TLocalCounter LocalCounters[CT_MAX]; -#if defined(LFALLOC_DBG) - TLocalPerTagAllocCounter LocalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES]; -#endif +#if defined(LFALLOC_DBG) + TLocalPerTagAllocCounter LocalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES]; +#endif #ifdef _win_ HANDLE hThread; #endif @@ -1136,14 +1136,14 @@ struct TThreadAllocInfo { for (int i = 0; i < CT_MAX; ++i) { LocalCounters[i].Init(&GlobalCounters[i]); } -#if defined(LFALLOC_DBG) - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& local = LocalPerTagAllocCounters[tag][sizeIdx]; - local.Init(); - } - } -#endif +#if defined(LFALLOC_DBG) + for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { + for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { + auto& local = LocalPerTagAllocCounters[tag][sizeIdx]; + local.Init(); + } + } +#endif } void Done() { for (auto sizeIdx : FreePtrIndex) { @@ -1152,15 +1152,15 @@ struct TThreadAllocInfo { for (auto& localCounter : LocalCounters) { localCounter.Flush(); } -#if defined(LFALLOC_DBG) - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& local = LocalPerTagAllocCounters[tag][sizeIdx]; - auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; - local.Flush(global); - } - } -#endif +#if defined(LFALLOC_DBG) + for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { + for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { + auto& local = LocalPerTagAllocCounters[tag][sizeIdx]; + auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; + local.Flush(global); + } + } +#endif #ifdef _win_ if (hThread) CloseHandle(hThread); @@ -1304,21 +1304,21 @@ static void AllocThreadInfo() { // DBG stuff ////////////////////////////////////////////////////////////////////////// -#if defined(LFALLOC_DBG) +#if defined(LFALLOC_DBG) -struct TAllocHeader { +struct TAllocHeader { uint64_t Size; int Tag; - int Cookie; -}; - + int Cookie; +}; + // should be power of 2 static_assert(sizeof(TAllocHeader) == 16); static inline void* GetAllocPtr(TAllocHeader* p) { - return p + 1; -} - + return p + 1; +} + static inline TAllocHeader* GetAllocHeader(void* p) { auto* header = ((TAllocHeader*)p) - 1; if (header->Tag == DBG_ALLOC_ALIGNED_TAG) { @@ -1326,24 +1326,24 @@ static inline TAllocHeader* GetAllocHeader(void* p) { } return header; -} - -PERTHREAD int AllocationTag; +} + +PERTHREAD int AllocationTag; extern "C" int SetThreadAllocTag(int tag) { - int prevTag = AllocationTag; + int prevTag = AllocationTag; if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) { - AllocationTag = tag; - } - return prevTag; -} - -PERTHREAD bool ProfileCurrentThread; + AllocationTag = tag; + } + return prevTag; +} + +PERTHREAD bool ProfileCurrentThread; extern "C" bool SetProfileCurrentThread(bool newVal) { - bool prevVal = ProfileCurrentThread; - ProfileCurrentThread = newVal; - return prevVal; -} - + bool prevVal = ProfileCurrentThread; + ProfileCurrentThread = newVal; + return prevVal; +} + static volatile bool ProfileAllThreads; extern "C" bool SetProfileAllThreads(bool newVal) { bool prevVal = ProfileAllThreads; @@ -1351,176 +1351,176 @@ extern "C" bool SetProfileAllThreads(bool newVal) { return prevVal; } -static volatile bool AllocationSamplingEnabled; +static volatile bool AllocationSamplingEnabled; extern "C" bool SetAllocationSamplingEnabled(bool newVal) { - bool prevVal = AllocationSamplingEnabled; - AllocationSamplingEnabled = newVal; - return prevVal; -} - -static size_t AllocationSampleRate = 1000; + bool prevVal = AllocationSamplingEnabled; + AllocationSamplingEnabled = newVal; + return prevVal; +} + +static size_t AllocationSampleRate = 1000; extern "C" size_t SetAllocationSampleRate(size_t newVal) { - size_t prevVal = AllocationSampleRate; - AllocationSampleRate = newVal; - return prevVal; -} - -static size_t AllocationSampleMaxSize = N_MAX_FAST_SIZE; + size_t prevVal = AllocationSampleRate; + AllocationSampleRate = newVal; + return prevVal; +} + +static size_t AllocationSampleMaxSize = N_MAX_FAST_SIZE; extern "C" size_t SetAllocationSampleMaxSize(size_t newVal) { - size_t prevVal = AllocationSampleMaxSize; - AllocationSampleMaxSize = newVal; - return prevVal; -} - -using TAllocationCallback = int(int tag, size_t size, int sizeIdx); -static TAllocationCallback* AllocationCallback; + size_t prevVal = AllocationSampleMaxSize; + AllocationSampleMaxSize = newVal; + return prevVal; +} + +using TAllocationCallback = int(int tag, size_t size, int sizeIdx); +static TAllocationCallback* AllocationCallback; extern "C" TAllocationCallback* SetAllocationCallback(TAllocationCallback* newVal) { - TAllocationCallback* prevVal = AllocationCallback; - AllocationCallback = newVal; - return prevVal; -} - -using TDeallocationCallback = void(int cookie, int tag, size_t size, int sizeIdx); -static TDeallocationCallback* DeallocationCallback; + TAllocationCallback* prevVal = AllocationCallback; + AllocationCallback = newVal; + return prevVal; +} + +using TDeallocationCallback = void(int cookie, int tag, size_t size, int sizeIdx); +static TDeallocationCallback* DeallocationCallback; extern "C" TDeallocationCallback* SetDeallocationCallback(TDeallocationCallback* newVal) { - TDeallocationCallback* prevVal = DeallocationCallback; - DeallocationCallback = newVal; - return prevVal; -} - -PERTHREAD TAtomic AllocationsCount; -PERTHREAD bool InAllocationCallback; - -static const int DBG_ALLOC_INVALID_COOKIE = -1; + TDeallocationCallback* prevVal = DeallocationCallback; + DeallocationCallback = newVal; + return prevVal; +} + +PERTHREAD TAtomic AllocationsCount; +PERTHREAD bool InAllocationCallback; + +static const int DBG_ALLOC_INVALID_COOKIE = -1; static inline int SampleAllocation(TAllocHeader* p, int sizeIdx) { - int cookie = DBG_ALLOC_INVALID_COOKIE; + int cookie = DBG_ALLOC_INVALID_COOKIE; if (AllocationSamplingEnabled && (ProfileCurrentThread || ProfileAllThreads) && !InAllocationCallback) { - if (p->Size > AllocationSampleMaxSize || ++AllocationsCount % AllocationSampleRate == 0) { - if (AllocationCallback) { - InAllocationCallback = true; - cookie = AllocationCallback(p->Tag, p->Size, sizeIdx); - InAllocationCallback = false; - } - } - } - return cookie; -} - + if (p->Size > AllocationSampleMaxSize || ++AllocationsCount % AllocationSampleRate == 0) { + if (AllocationCallback) { + InAllocationCallback = true; + cookie = AllocationCallback(p->Tag, p->Size, sizeIdx); + InAllocationCallback = false; + } + } + } + return cookie; +} + static inline void SampleDeallocation(TAllocHeader* p, int sizeIdx) { - if (p->Cookie != DBG_ALLOC_INVALID_COOKIE && !InAllocationCallback) { - if (DeallocationCallback) { - InAllocationCallback = true; - DeallocationCallback(p->Cookie, p->Tag, p->Size, sizeIdx); - InAllocationCallback = false; - } - } -} - + if (p->Cookie != DBG_ALLOC_INVALID_COOKIE && !InAllocationCallback) { + if (DeallocationCallback) { + InAllocationCallback = true; + DeallocationCallback(p->Cookie, p->Tag, p->Size, sizeIdx); + InAllocationCallback = false; + } + } +} + static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) { if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES); - auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; - - TThreadAllocInfo* thr = pThreadInfo; - if (thr) { - auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx]; - local.Alloc(global, p->Size); - } else { - global.Alloc(p->Size); - } - } -} - + auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; + + TThreadAllocInfo* thr = pThreadInfo; + if (thr) { + auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx]; + local.Alloc(global, p->Size); + } else { + global.Alloc(p->Size); + } + } +} + static inline void TrackPerTagDeallocation(TAllocHeader* p, int sizeIdx) { if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES); - auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; - - TThreadAllocInfo* thr = pThreadInfo; - if (thr) { - auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx]; - local.Free(global, p->Size); - } else { - global.Free(p->Size); - } - } -} - + auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; + + TThreadAllocInfo* thr = pThreadInfo; + if (thr) { + auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx]; + local.Free(global, p->Size); + } else { + global.Free(p->Size); + } + } +} + static void* TrackAllocation(void* ptr, size_t size, int sizeIdx) { - TAllocHeader* p = (TAllocHeader*)ptr; - p->Size = size; - p->Tag = AllocationTag; - p->Cookie = SampleAllocation(p, sizeIdx); - TrackPerTagAllocation(p, sizeIdx); - return GetAllocPtr(p); -} - + TAllocHeader* p = (TAllocHeader*)ptr; + p->Size = size; + p->Tag = AllocationTag; + p->Cookie = SampleAllocation(p, sizeIdx); + TrackPerTagAllocation(p, sizeIdx); + return GetAllocPtr(p); +} + static void TrackDeallocation(void* ptr, int sizeIdx) { - TAllocHeader* p = (TAllocHeader*)ptr; - SampleDeallocation(p, sizeIdx); - TrackPerTagDeallocation(p, sizeIdx); -} - -struct TPerTagAllocInfo { - ssize_t Count; - ssize_t Size; -}; - -extern "C" void GetPerTagAllocInfo( - bool flushPerThreadCounters, - TPerTagAllocInfo* info, - int& maxTag, + TAllocHeader* p = (TAllocHeader*)ptr; + SampleDeallocation(p, sizeIdx); + TrackPerTagDeallocation(p, sizeIdx); +} + +struct TPerTagAllocInfo { + ssize_t Count; + ssize_t Size; +}; + +extern "C" void GetPerTagAllocInfo( + bool flushPerThreadCounters, + TPerTagAllocInfo* info, + int& maxTag, int& numSizes) { - maxTag = DBG_ALLOC_MAX_TAG; - numSizes = DBG_ALLOC_NUM_SIZES; - - if (info) { - if (flushPerThreadCounters) { + maxTag = DBG_ALLOC_MAX_TAG; + numSizes = DBG_ALLOC_NUM_SIZES; + + if (info) { + if (flushPerThreadCounters) { TLFLockHolder ll(&LFLockThreadInfo); for (TThreadAllocInfo** p = &pThreadInfoList; *p;) { TThreadAllocInfo* pInfo = *p; - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& local = pInfo->LocalPerTagAllocCounters[tag][sizeIdx]; - auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; - local.Flush(global); - } - } - p = &pInfo->pNextInfo; - } - } - - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; + for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { + for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { + auto& local = pInfo->LocalPerTagAllocCounters[tag][sizeIdx]; + auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; + local.Flush(global); + } + } + p = &pInfo->pNextInfo; + } + } + + for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { + for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { + auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; auto& res = info[tag * DBG_ALLOC_NUM_SIZES + sizeIdx]; - res.Count = global.Count; - res.Size = global.Size; - } - } - } -} - + res.Count = global.Count; + res.Size = global.Size; + } + } + } +} + #endif // LFALLOC_DBG - + ////////////////////////////////////////////////////////////////////////// static Y_FORCE_INLINE void* LFAllocImpl(size_t _nSize) { -#if defined(LFALLOC_DBG) - size_t size = _nSize; - _nSize += sizeof(TAllocHeader); -#endif - +#if defined(LFALLOC_DBG) + size_t size = _nSize; + _nSize += sizeof(TAllocHeader); +#endif + IncrementCounter(CT_USER_ALLOC, _nSize); int nSizeIdx; if (_nSize > 512) { - if (_nSize > N_MAX_FAST_SIZE) { - void* ptr = LargeBlockAlloc(_nSize, CT_LARGE_ALLOC); -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, N_SIZES); -#endif - return ptr; - } + if (_nSize > N_MAX_FAST_SIZE) { + void* ptr = LargeBlockAlloc(_nSize, CT_LARGE_ALLOC); +#if defined(LFALLOC_DBG) + ptr = TrackAllocation(ptr, size, N_SIZES); +#endif + return ptr; + } nSizeIdx = size2idxArr2[(_nSize - 1) >> 8]; } else nSizeIdx = size2idxArr1[1 + (((int)_nSize - 1) >> 3)]; @@ -1533,22 +1533,22 @@ static Y_FORCE_INLINE void* LFAllocImpl(size_t _nSize) { AllocThreadInfo(); thr = pThreadInfo; if (!thr) { - void* ptr = LFAllocNoCache(nSizeIdx, MEM_DEFRAG); -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, nSizeIdx); -#endif - return ptr; + void* ptr = LFAllocNoCache(nSizeIdx, MEM_DEFRAG); +#if defined(LFALLOC_DBG) + ptr = TrackAllocation(ptr, size, nSizeIdx); +#endif + return ptr; } } { int& freePtrIdx = thr->FreePtrIndex[nSizeIdx]; - if (freePtrIdx < THREAD_BUF) { - void* ptr = thr->FreePtrs[nSizeIdx][freePtrIdx++]; -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, nSizeIdx); -#endif - return ptr; - } + if (freePtrIdx < THREAD_BUF) { + void* ptr = thr->FreePtrs[nSizeIdx][freePtrIdx++]; +#if defined(LFALLOC_DBG) + ptr = TrackAllocation(ptr, size, nSizeIdx); +#endif + return ptr; + } // try to alloc from global free list char* buf[FL_GROUP_SIZE]; @@ -1563,11 +1563,11 @@ static Y_FORCE_INLINE void* LFAllocImpl(size_t _nSize) { for (int i = 0; i < count - 1; ++i) dstBuf[-i] = buf[i]; freePtrIdx -= count - 1; - void* ptr = buf[count - 1]; -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, nSizeIdx); -#endif - return ptr; + void* ptr = buf[count - 1]; +#if defined(LFALLOC_DBG) + ptr = TrackAllocation(ptr, size, nSizeIdx); +#endif + return ptr; } } @@ -1582,33 +1582,33 @@ static Y_FORCE_INLINE void* LFAlloc(size_t _nSize) { } static Y_FORCE_INLINE void LFFree(void* p) { -#if defined(LFALLOC_DBG) - if (p == nullptr) - return; - p = GetAllocHeader(p); -#endif - +#if defined(LFALLOC_DBG) + if (p == nullptr) + return; + p = GetAllocHeader(p); +#endif + uintptr_t chkOffset = ((char*)p - ALLOC_START) - 1ll; if (chkOffset >= N_MAX_WORKSET_SIZE) { if (p == nullptr) return; -#if defined(LFALLOC_DBG) - TrackDeallocation(p, N_SIZES); -#endif +#if defined(LFALLOC_DBG) + TrackDeallocation(p, N_SIZES); +#endif LargeBlockFree(p, CT_LARGE_FREE); return; } - + uintptr_t chunk = ((char*)p - ALLOC_START) / N_CHUNK_SIZE; ptrdiff_t nSizeIdx = chunkSizeIdx[chunk]; if (nSizeIdx <= 0) { -#if defined(LFALLOC_DBG) - TrackDeallocation(p, N_SIZES); -#endif +#if defined(LFALLOC_DBG) + TrackDeallocation(p, N_SIZES); +#endif LargeBlockFree(p, CT_LARGE_FREE); return; } - + #if defined(LFALLOC_DBG) TrackDeallocation(p, nSizeIdx); #endif @@ -1645,12 +1645,12 @@ static Y_FORCE_INLINE void LFFree(void* p) { } static size_t LFGetSize(const void* p) { -#if defined(LFALLOC_DBG) - if (p == nullptr) - return 0; +#if defined(LFALLOC_DBG) + if (p == nullptr) + return 0; return GetAllocHeader(const_cast<void*>(p))->Size; -#endif - +#endif + uintptr_t chkOffset = ((const char*)p - ALLOC_START); if (chkOffset >= N_MAX_WORKSET_SIZE) { if (p == nullptr) @@ -1827,10 +1827,10 @@ static bool LFAlloc_SetParam(const char* param, const char* value) { TransparentHugePages = !strcmp(value, "true"); return true; } - if (!strcmp(param, "MapHugeTLB")) { - MapHugeTLB = !strcmp(value, "true"); - return true; - } + if (!strcmp(param, "MapHugeTLB")) { + MapHugeTLB = !strcmp(value, "true"); + return true; + } if (!strcmp(param, "EnableDefrag")) { EnableDefrag = !strcmp(value, "true"); return true; @@ -1839,15 +1839,15 @@ static bool LFAlloc_SetParam(const char* param, const char* value) { }; static const char* LFAlloc_GetParam(const char* param) { - struct TParam { - const char* Name; - const char* Value; - }; - - static const TParam Params[] = { + struct TParam { + const char* Name; + const char* Value; + }; + + static const TParam Params[] = { {"GetLFAllocCounterFast", (const char*)&GetLFAllocCounterFast}, {"GetLFAllocCounterFull", (const char*)&GetLFAllocCounterFull}, -#if defined(LFALLOC_DBG) +#if defined(LFALLOC_DBG) {"SetThreadAllocTag", (const char*)&SetThreadAllocTag}, {"SetProfileCurrentThread", (const char*)&SetProfileCurrentThread}, {"SetProfileAllThreads", (const char*)&SetProfileAllThreads}, @@ -1858,12 +1858,12 @@ static const char* LFAlloc_GetParam(const char* param) { {"SetDeallocationCallback", (const char*)&SetDeallocationCallback}, {"GetPerTagAllocInfo", (const char*)&GetPerTagAllocInfo}, #endif // LFALLOC_DBG - }; - - for (int i = 0; i < Y_ARRAY_SIZE(Params); ++i) { - if (strcmp(param, Params[i].Name) == 0) { - return Params[i].Value; - } + }; + + for (int i = 0; i < Y_ARRAY_SIZE(Params); ++i) { + if (strcmp(param, Params[i].Name) == 0) { + return Params[i].Value; + } } return nullptr; } |