Intermediate changes

commit_hash:60e005cdf76d5bff2a370a6b8f35ef4f6792f414
author: robot-piglet <robot-piglet@yandex-team.com> 2024-11-29 18:15:29 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2024-11-29 18:40:16 +0300
commit: 2aee7cda6c758679145563340ff94fc652670ea1 (patch)
tree: 9ea64f4df3b6a7d2444be0fb216793e7032c9460
parent: fefd531d45e1854a567b58f4d6a661d3d365ece1 (diff)
download: ydb-2aee7cda6c758679145563340ff94fc652670ea1.tar.gz
31 files changed, 1 insertions, 5246 deletions
diff --git a/library/cpp/yt/logging/unittests/ya.make b/library/cpp/yt/logging/unittests/ya.make
index 021b0d09d6..4baea62140 100644
--- a/library/cpp/yt/logging/unittests/ya.make
+++ b/library/cpp/yt/logging/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-library-logging)
 
 INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS)
-    ALLOCATOR(YT)
-ENDIF()
-
 SRCS(
     logger_ut.cpp
     static_analysis_ut.cpp
diff --git a/library/cpp/yt/memory/unittests/ya.make b/library/cpp/yt/memory/unittests/ya.make
index 920873de26..1a61a130ba 100644
--- a/library/cpp/yt/memory/unittests/ya.make
+++ b/library/cpp/yt/memory/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-library-memory)
 
 INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS)
-    ALLOCATOR(YT)
-ENDIF()
-
 SRCS(
     atomic_intrusive_ptr_ut.cpp
     chunked_memory_pool_ut.cpp
diff --git a/library/cpp/ytalloc/impl/README.md b/library/cpp/ytalloc/impl/README.md
deleted file mode 100644
index 6d142085a8..0000000000
--- a/library/cpp/ytalloc/impl/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-This module contains the actual implementation of YTAlloc. Use
-```
-ALLOCATOR(YT)
-```
-to get it linked into your binary.
diff --git a/library/cpp/ytalloc/impl/bridge.cpp b/library/cpp/ytalloc/impl/bridge.cpp
deleted file mode 100644
index 95a4761998..0000000000
--- a/library/cpp/ytalloc/impl/bridge.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-#include "core-inl.h"
-
-#include <util/system/compiler.h>
-
-#include <library/cpp/malloc/api/malloc.h>
-
-#include <library/cpp/yt/memory/memory_tag.h>
-
-namespace NYT::NYTAlloc {
-
-////////////////////////////////////////////////////////////////////////////////
-// YTAlloc public API
-
-#ifdef YT_ALLOC_ENABLED
-
-void* Allocate(size_t size)
-{
-    return AllocateInline(size);
-}
-
-void* AllocateSmall(size_t rank)
-{
-    return AllocateSmallInline(rank);
-}
-
-void* AllocatePageAligned(size_t size)
-{
-    return AllocatePageAlignedInline(size);
-}
-
-void Free(void* ptr)
-{
-    FreeInline(ptr);
-}
-
-void FreeNonNull(void* ptr)
-{
-    FreeNonNullInline(ptr);
-}
-
-size_t GetAllocationSize(const void* ptr)
-{
-    return GetAllocationSizeInline(ptr);
-}
-
-size_t GetAllocationSize(size_t size)
-{
-    return GetAllocationSizeInline(size);
-}
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NYTAlloc
-
-namespace NYT {
-
-using namespace NYTAlloc;
-
-////////////////////////////////////////////////////////////////////////////////
-// Memory tags API bridge
-
-TMemoryTag GetCurrentMemoryTag()
-{
-    return NYTAlloc::TThreadManager::GetCurrentMemoryTag();
-}
-
-void SetCurrentMemoryTag(TMemoryTag tag)
-{
-    TThreadManager::SetCurrentMemoryTag(tag);
-}
-
-void GetMemoryUsageForTags(const TMemoryTag* tags, size_t count, size_t* results)
-{
-    InitializeGlobals();
-    StatisticsManager->GetTaggedMemoryUsage(tags, count, results);
-}
-
-size_t GetMemoryUsageForTag(TMemoryTag tag)
-{
-    size_t result;
-    GetMemoryUsageForTags(&tag, 1, &result);
-    return result;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT
-
-namespace NYT::NYTAlloc {
-
-////////////////////////////////////////////////////////////////////////////////
-// Memory zone API bridge
-
-void SetCurrentMemoryZone(EMemoryZone zone)
-{
-    TThreadManager::SetCurrentMemoryZone(zone);
-}
-
-EMemoryZone GetCurrentMemoryZone()
-{
-    return TThreadManager::GetCurrentMemoryZone();
-}
-
-EMemoryZone GetAllocationMemoryZone(const void* ptr)
-{
-    auto uintptr = reinterpret_cast<uintptr_t>(ptr);
-    if (uintptr >= MinUntaggedSmallPtr && uintptr < MaxUntaggedSmallPtr ||
-        uintptr >= MinTaggedSmallPtr && uintptr < MaxTaggedSmallPtr ||
-        uintptr >= DumpableLargeZoneStart && uintptr < DumpableLargeZoneEnd)
-    {
-        return EMemoryZone::Normal;
-    } else if (uintptr >= UndumpableLargeZoneStart && uintptr < UndumpableLargeZoneEnd) {
-        return EMemoryZone::Undumpable;
-    } else {
-        return EMemoryZone::Unknown;
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Fiber id API
-
-void SetCurrentFiberId(TFiberId id)
-{
-    TThreadManager::SetCurrentFiberId(id);
-}
-
-TFiberId GetCurrentFiberId()
-{
-    return TThreadManager::GetCurrentFiberId();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NYTAlloc
-
-////////////////////////////////////////////////////////////////////////////////
-// Libc malloc bridge
-
-#ifdef YT_ALLOC_ENABLED
-
-using namespace NYT::NYTAlloc;
-
-extern "C" void* malloc(size_t size)
-{
-    return AllocateInline(size);
-}
-
-extern "C" void* valloc(size_t size)
-{
-    return AllocatePageAlignedInline(size);
-}
-
-extern "C" void* aligned_alloc(size_t alignment, size_t size)
-{
-    // Alignment must be a power of two.
-    Y_ABORT_UNLESS((alignment & (alignment - 1)) == 0);
-    // Alignment must not exceed the page size.
-    Y_ABORT_UNLESS(alignment <= PageSize);
-    if (alignment <= 16) {
-        // Proper alignment here is automatic.
-        return Allocate(size);
-    } else {
-        return AllocatePageAligned(size);
-    }
-}
-
-extern "C" void* pvalloc(size_t size)
-{
-    return valloc(AlignUp(size, PageSize));
-}
-
-extern "C" int posix_memalign(void** ptrPtr, size_t alignment, size_t size)
-{
-    *ptrPtr = aligned_alloc(alignment, size);
-    return 0;
-}
-
-extern "C" void* memalign(size_t alignment, size_t size)
-{
-    return aligned_alloc(alignment, size);
-}
-
-extern "C" void* __libc_memalign(size_t alignment, size_t size)
-{
-    return aligned_alloc(alignment, size);
-}
-
-extern "C" void free(void* ptr)
-{
-    FreeInline(ptr);
-}
-
-extern "C" void* calloc(size_t n, size_t elemSize)
-{
-    // Overflow check.
-    auto size = n * elemSize;
-    if (elemSize != 0 && size / elemSize != n) {
-        return nullptr;
-    }
-
-    void* result = Allocate(size);
-    ::memset(result, 0, size);
-    return result;
-}
-
-extern "C" void cfree(void* ptr)
-{
-    Free(ptr);
-}
-
-extern "C" void* realloc(void* oldPtr, size_t newSize)
-{
-    if (!oldPtr) {
-        return Allocate(newSize);
-    }
-
-    if (newSize == 0) {
-        Free(oldPtr);
-        return nullptr;
-    }
-
-    void* newPtr = Allocate(newSize);
-    size_t oldSize = GetAllocationSize(oldPtr);
-    ::memcpy(newPtr, oldPtr, std::min(oldSize, newSize));
-    Free(oldPtr);
-    return newPtr;
-}
-
-extern "C" size_t malloc_usable_size(void* ptr) noexcept
-{
-    return GetAllocationSize(ptr);
-}
-
-extern "C" size_t nallocx(size_t size, int /* flags */) noexcept
-{
-    return GetAllocationSize(size);
-}
-
-#endif
-
-namespace NMalloc {
-
-////////////////////////////////////////////////////////////////////////////////
-// Arcadia malloc API bridge
-
-TMallocInfo MallocInfo()
-{
-    TMallocInfo info;
-    info.Name = "ytalloc";
-    return info;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NMalloc
diff --git a/library/cpp/ytalloc/impl/core-inl.h b/library/cpp/ytalloc/impl/core-inl.h
deleted file mode 100644
index 5a4f6a260b..0000000000
--- a/library/cpp/ytalloc/impl/core-inl.h
+++ /dev/null
@@ -1,4849 +0,0 @@
-#pragma once
-
-// This file contains the core parts of YTAlloc but no malloc/free-bridge.
-// The latter bridge is placed into alloc.cpp, which includes (sic!) core-inl.h.
-// This ensures that AllocateInline/FreeInline calls are properly inlined into malloc/free.
-// Also core-inl.h can be directly included in, e.g., benchmarks.
-
-#include <library/cpp/yt/containers/intrusive_linked_list.h>
-
-#include <library/cpp/yt/memory/memory_tag.h>
-
-#include <library/cpp/yt/threading/at_fork.h>
-#include <library/cpp/yt/threading/fork_aware_spin_lock.h>
-
-#include <library/cpp/yt/memory/free_list.h>
-
-#include <util/system/tls.h>
-#include <util/system/align.h>
-#include <util/system/thread.h>
-
-#include <util/string/printf.h>
-
-#include <util/generic/singleton.h>
-#include <util/generic/size_literals.h>
-#include <util/generic/utility.h>
-
-#include <util/digest/numeric.h>
-
-#include <library/cpp/ytalloc/api/ytalloc.h>
-
-#include <atomic>
-#include <array>
-#include <vector>
-#include <mutex>
-#include <thread>
-#include <condition_variable>
-#include <cstdio>
-#include <optional>
-
-#include <sys/mman.h>
-
-#ifdef _linux_
-    #include <sys/utsname.h>
-#endif
-
-#include <errno.h>
-#include <pthread.h>
-#include <time.h>
-
-#ifndef MAP_POPULATE
-    #define MAP_POPULATE 0x08000
-#endif
-
-// MAP_FIXED which doesn't unmap underlying mapping.
-// Linux kernels older than 4.17 silently ignore this flag.
-#ifndef MAP_FIXED_NOREPLACE
-    #ifdef _linux_
-        #define MAP_FIXED_NOREPLACE 0x100000
-    #else
-        #define MAP_FIXED_NOREPLACE 0
-    #endif
-#endif
-
-#ifndef MADV_POPULATE
-    #define MADV_POPULATE 0x59410003
-#endif
-
-#ifndef MADV_STOCKPILE
-    #define MADV_STOCKPILE 0x59410004
-#endif
-
-#ifndef MADV_FREE
-    #define MADV_FREE 8
-#endif
-
-#ifndef MADV_DONTDUMP
-    #define MADV_DONTDUMP 16
-#endif
-
-#ifndef NDEBUG
-    #define YTALLOC_PARANOID
-#endif
-
-#ifdef YTALLOC_PARANOID
-    #define YTALLOC_NERVOUS
-#endif
-
-#define YTALLOC_VERIFY(condition)                                                             \
-    do {                                                                                      \
-        if (Y_UNLIKELY(!(condition))) {                                                       \
-            ::NYT::NYTAlloc::AssertTrap("Assertion failed: " #condition, __FILE__, __LINE__); \
-        }                                                                                     \
-    } while (false)
-
-#ifdef NDEBUG
-    #define YTALLOC_ASSERT(condition) YTALLOC_VERIFY(condition)
-#else
-    #define YTALLOC_ASSERT(condition) (void)(0)
-#endif
-
-#ifdef YTALLOC_PARANOID
-    #define YTALLOC_PARANOID_ASSERT(condition) YTALLOC_VERIFY(condition)
-#else
-    #define YTALLOC_PARANOID_ASSERT(condition) (true || (condition))
-#endif
-
-#define YTALLOC_TRAP(message) ::NYT::NYTAlloc::AssertTrap(message, __FILE__, __LINE__)
-
-namespace NYT::NYTAlloc {
-
-////////////////////////////////////////////////////////////////////////////////
-// Allocations are classified into three types:
-//
-// a) Small chunks (less than LargeAllocationSizeThreshold)
-// These are the fastest and are extensively cached (both per-thread and globally).
-// Memory claimed for these allocations is never reclaimed back.
-// Code dealing with such allocations is heavy optimized with all hot paths
-// as streamlined as possible. The implementation is mostly inspired by LFAlloc.
-//
-// b) Large blobs (from LargeAllocationSizeThreshold to HugeAllocationSizeThreshold)
-// These are cached as well. We expect such allocations to be less frequent
-// than small ones but still do our best to provide good scalability.
-// In particular, thread-sharded concurrent data structures as used to provide access to
-// cached blobs. Memory is claimed via madvise(MADV_POPULATE) and reclaimed back
-// via madvise(MADV_FREE).
-//
-// c) Huge blobs (from HugeAllocationSizeThreshold)
-// These should be rare; we delegate directly to mmap and munmap for each allocation.
-//
-// We also provide a separate allocator for all system allocations (that are needed by YTAlloc itself).
-// These are rare and also delegate to mmap/unmap.
-
-// Periods between background activities.
-constexpr auto BackgroundInterval = TDuration::Seconds(1);
-
-static_assert(LargeRankCount - MinLargeRank <= 16, "Too many large ranks");
-static_assert(SmallRankCount <= 32, "Too many small ranks");
-
-constexpr size_t SmallZoneSize = 1_TB;
-constexpr size_t LargeZoneSize = 16_TB;
-constexpr size_t HugeZoneSize = 1_TB;
-constexpr size_t SystemZoneSize = 1_TB;
-
-constexpr size_t MaxCachedChunksPerRank = 256;
-
-constexpr uintptr_t UntaggedSmallZonesStart = 0;
-constexpr uintptr_t UntaggedSmallZonesEnd = UntaggedSmallZonesStart + 32 * SmallZoneSize;
-constexpr uintptr_t MinUntaggedSmallPtr = UntaggedSmallZonesStart + SmallZoneSize * 1;
-constexpr uintptr_t MaxUntaggedSmallPtr = UntaggedSmallZonesStart + SmallZoneSize * SmallRankCount;
-
-constexpr uintptr_t TaggedSmallZonesStart = UntaggedSmallZonesEnd;
-constexpr uintptr_t TaggedSmallZonesEnd = TaggedSmallZonesStart + 32 * SmallZoneSize;
-constexpr uintptr_t MinTaggedSmallPtr = TaggedSmallZonesStart + SmallZoneSize * 1;
-constexpr uintptr_t MaxTaggedSmallPtr = TaggedSmallZonesStart + SmallZoneSize * SmallRankCount;
-
-constexpr uintptr_t DumpableLargeZoneStart = TaggedSmallZonesEnd;
-constexpr uintptr_t DumpableLargeZoneEnd = DumpableLargeZoneStart + LargeZoneSize;
-
-constexpr uintptr_t UndumpableLargeZoneStart = DumpableLargeZoneEnd;
-constexpr uintptr_t UndumpableLargeZoneEnd = UndumpableLargeZoneStart + LargeZoneSize;
-
-constexpr uintptr_t LargeZoneStart(bool dumpable)
-{
-    return dumpable ? DumpableLargeZoneStart : UndumpableLargeZoneStart;
-}
-constexpr uintptr_t LargeZoneEnd(bool dumpable)
-{
-    return dumpable ? DumpableLargeZoneEnd : UndumpableLargeZoneEnd;
-}
-
-constexpr uintptr_t HugeZoneStart = UndumpableLargeZoneEnd;
-constexpr uintptr_t HugeZoneEnd = HugeZoneStart + HugeZoneSize;
-
-constexpr uintptr_t SystemZoneStart = HugeZoneEnd;
-constexpr uintptr_t SystemZoneEnd = SystemZoneStart + SystemZoneSize;
-
-// We leave 64_KB at the end of 256_MB block and never use it.
-// That serves two purposes:
-//   1. SmallExtentSize % SmallSegmentSize == 0
-//   2. Every small object satisfies RightReadableArea requirement.
-constexpr size_t SmallExtentAllocSize = 256_MB;
-constexpr size_t SmallExtentSize = SmallExtentAllocSize - 64_KB;
-constexpr size_t SmallSegmentSize = 96_KB; // LCM(SmallRankToSize)
-
-constexpr ui16 SmallRankBatchSize[SmallRankCount] = {
-    0, 256, 256, 256, 256, 256, 256, 256, 256, 256, 192, 128, 96, 64, 48, 32, 24, 16, 12, 8, 6, 4, 3
-};
-
-constexpr bool CheckSmallSizes()
-{
-    for (size_t rank = 0; rank < SmallRankCount; rank++) {
-        auto size = SmallRankToSize[rank];
-        if (size == 0) {
-            continue;
-        }
-
-        if (SmallSegmentSize % size != 0) {
-            return false;
-        }
-
-        if (SmallRankBatchSize[rank] > MaxCachedChunksPerRank) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static_assert(CheckSmallSizes());
-static_assert(SmallExtentSize % SmallSegmentSize == 0);
-static_assert(SmallSegmentSize % PageSize == 0);
-
-constexpr size_t LargeExtentSize = 1_GB;
-static_assert(LargeExtentSize >= LargeAllocationSizeThreshold, "LargeExtentSize < LargeAllocationSizeThreshold");
-
-constexpr const char* BackgroundThreadName = "YTAllocBack";
-constexpr const char* StockpileThreadName = "YTAllocStock";
-
-DEFINE_ENUM(EAllocationKind,
-    (Untagged)
-    (Tagged)
-);
-
-// Forward declarations.
-struct TThreadState;
-struct TLargeArena;
-struct TLargeBlobExtent;
-
-////////////////////////////////////////////////////////////////////////////////
-// Traps and assertions
-
-[[noreturn]]
-void OomTrap()
-{
-    _exit(9);
-}
-
-[[noreturn]]
-void AssertTrap(const char* message, const char* file, int line)
-{
-    ::fprintf(stderr, "*** YTAlloc has detected an internal trap at %s:%d\n*** %s\n",
-        file,
-        line,
-        message);
-    __builtin_trap();
-}
-
-template <class T, class E>
-void AssertBlobState(T* header, E expectedState)
-{
-    auto actualState = header->State;
-    if (Y_UNLIKELY(actualState != expectedState)) {
-        char message[256];
-        snprintf(message, sizeof(message), "Invalid blob header state at %p: expected %" PRIx64 ", actual %" PRIx64,
-            header,
-            static_cast<ui64>(expectedState),
-            static_cast<ui64>(actualState));
-        YTALLOC_TRAP(message);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Provides a never-dying singleton with explicit construction.
-template <class T>
-class TExplicitlyConstructableSingleton
-{
-public:
-    TExplicitlyConstructableSingleton()
-    { }
-
-    ~TExplicitlyConstructableSingleton()
-    { }
-
-    template <class... Ts>
-    void Construct(Ts&&... args)
-    {
-        new (&Storage_) T(std::forward<Ts>(args)...);
-#ifndef NDEBUG
-        Constructed_ = true;
-#endif
-    }
-
-    Y_FORCE_INLINE T* Get()
-    {
-#ifndef NDEBUG
-        YTALLOC_PARANOID_ASSERT(Constructed_);
-#endif
-        return &Storage_;
-    }
-
-    Y_FORCE_INLINE const T* Get() const
-    {
-#ifndef NDEBUG
-        YTALLOC_PARANOID_ASSERT(Constructed_);
-#endif
-        return &Storage_;
-    }
-
-    Y_FORCE_INLINE T* operator->()
-    {
-        return Get();
-    }
-
-    Y_FORCE_INLINE const T* operator->() const
-    {
-        return Get();
-    }
-
-    Y_FORCE_INLINE T& operator*()
-    {
-        return *Get();
-    }
-
-    Y_FORCE_INLINE const T& operator*() const
-    {
-        return *Get();
-    }
-
-private:
-    union {
-        T Storage_;
-    };
-
-#ifndef NDEBUG
-    bool Constructed_;
-#endif
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Initializes all singletons.
-// Safe to call multiple times.
-// Guaranteed to not allocate.
-void InitializeGlobals();
-
-// Spawns the background thread, if it's time.
-// Safe to call multiple times.
-// Must be called on allocation slow path.
-void StartBackgroundThread();
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TLogManager
-{
-public:
-    // Sets the handler to be invoked for each log event produced by YTAlloc.
-    void EnableLogging(TLogHandler logHandler)
-    {
-        LogHandler_.store(logHandler);
-    }
-
-    // Checks (in a racy way) that logging is enabled.
-    bool IsLoggingEnabled()
-    {
-        return LogHandler_.load() != nullptr;
-    }
-
-    // Logs the message via log handler (if any).
-    template <class... Ts>
-    void LogMessage(ELogEventSeverity severity, const char* format, Ts&&... args)
-    {
-        auto logHandler = LogHandler_.load();
-        if (!logHandler) {
-            return;
-        }
-
-        std::array<char, 16_KB> buffer;
-        auto len = ::snprintf(buffer.data(), buffer.size(), format, std::forward<Ts>(args)...);
-
-        TLogEvent event;
-        event.Severity = severity;
-        event.Message = TStringBuf(buffer.data(), len);
-        logHandler(event);
-    }
-
-    // A special case of zero args.
-    void LogMessage(ELogEventSeverity severity, const char* message)
-    {
-        LogMessage(severity, "%s", message);
-    }
-
-private:
-    std::atomic<TLogHandler> LogHandler_= nullptr;
-
-};
-
-TExplicitlyConstructableSingleton<TLogManager> LogManager;
-
-#define YTALLOC_LOG_EVENT(...)   LogManager->LogMessage(__VA_ARGS__)
-#define YTALLOC_LOG_DEBUG(...)   YTALLOC_LOG_EVENT(ELogEventSeverity::Debug, __VA_ARGS__)
-#define YTALLOC_LOG_INFO(...)    YTALLOC_LOG_EVENT(ELogEventSeverity::Info, __VA_ARGS__)
-#define YTALLOC_LOG_WARNING(...) YTALLOC_LOG_EVENT(ELogEventSeverity::Warning, __VA_ARGS__)
-#define YTALLOC_LOG_ERROR(...)   YTALLOC_LOG_EVENT(ELogEventSeverity::Error, __VA_ARGS__)
-
-////////////////////////////////////////////////////////////////////////////////
-
-Y_FORCE_INLINE size_t GetUsed(ssize_t allocated, ssize_t freed)
-{
-    return allocated >= freed ? static_cast<size_t>(allocated - freed) : 0;
-}
-
-template <class T>
-Y_FORCE_INLINE void* HeaderToPtr(T* header)
-{
-    return header + 1;
-}
-
-template <class T>
-Y_FORCE_INLINE T* PtrToHeader(void* ptr)
-{
-    return static_cast<T*>(ptr) - 1;
-}
-
-template <class T>
-Y_FORCE_INLINE const T* PtrToHeader(const void* ptr)
-{
-    return static_cast<const T*>(ptr) - 1;
-}
-
-Y_FORCE_INLINE size_t PtrToSmallRank(const void* ptr)
-{
-    return (reinterpret_cast<uintptr_t>(ptr) >> 40) & 0x1f;
-}
-
-Y_FORCE_INLINE char* AlignDownToSmallSegment(char* extent, char* ptr)
-{
-    auto offset = static_cast<uintptr_t>(ptr - extent);
-    // NB: This modulo operation is always performed using multiplication.
-    offset -= (offset % SmallSegmentSize);
-    return extent + offset;
-}
-
-Y_FORCE_INLINE char* AlignUpToSmallSegment(char* extent, char* ptr)
-{
-    return AlignDownToSmallSegment(extent, ptr + SmallSegmentSize - 1);
-}
-
-template <class T>
-static Y_FORCE_INLINE void UnalignPtr(void*& ptr)
-{
-    if (reinterpret_cast<uintptr_t>(ptr) % PageSize == 0) {
-        reinterpret_cast<char*&>(ptr) -= PageSize - sizeof (T);
-    }
-    YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) % PageSize == sizeof (T));
-}
-
-template <class T>
-static Y_FORCE_INLINE void UnalignPtr(const void*& ptr)
-{
-    if (reinterpret_cast<uintptr_t>(ptr) % PageSize == 0) {
-        reinterpret_cast<const char*&>(ptr) -= PageSize - sizeof (T);
-    }
-    YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) % PageSize == sizeof (T));
-}
-
-template <class T>
-Y_FORCE_INLINE size_t GetRawBlobSize(size_t size)
-{
-    return AlignUp(size + sizeof (T) + RightReadableAreaSize, PageSize);
-}
-
-template <class T>
-Y_FORCE_INLINE size_t GetBlobAllocationSize(size_t size)
-{
-    size += sizeof(T);
-    size += RightReadableAreaSize;
-    size = AlignUp(size, PageSize);
-    size -= sizeof(T);
-    size -= RightReadableAreaSize;
-    return size;
-}
-
-Y_FORCE_INLINE size_t GetLargeRank(size_t size)
-{
-    size_t rank = 64 - __builtin_clzl(size);
-    if (size == (1ULL << (rank - 1))) {
-        --rank;
-    }
-    return rank;
-}
-
-Y_FORCE_INLINE void PoisonRange(void* ptr, size_t size, ui32 magic)
-{
-#ifdef YTALLOC_PARANOID
-    size = ::AlignUp<size_t>(size, 4);
-    std::fill(static_cast<ui32*>(ptr), static_cast<ui32*>(ptr) + size / 4, magic);
-#else
-    Y_UNUSED(ptr);
-    Y_UNUSED(size);
-    Y_UNUSED(magic);
-#endif
-}
-
-Y_FORCE_INLINE void PoisonFreedRange(void* ptr, size_t size)
-{
-    PoisonRange(ptr, size, 0xdeadbeef);
-}
-
-Y_FORCE_INLINE void PoisonUninitializedRange(void* ptr, size_t size)
-{
-    PoisonRange(ptr, size, 0xcafebabe);
-}
-
-// Checks that the header size is divisible by 16 (as needed due to alignment restrictions).
-#define CHECK_HEADER_ALIGNMENT(T) static_assert(sizeof(T) % 16 == 0, "sizeof(" #T ") % 16 != 0");
-
-////////////////////////////////////////////////////////////////////////////////
-
-static_assert(sizeof(TFreeList<void>) == CacheLineSize, "sizeof(TFreeList) != CacheLineSize");
-
-////////////////////////////////////////////////////////////////////////////////
-
-constexpr size_t ShardCount = 16;
-std::atomic<size_t> GlobalCurrentShardIndex;
-
-// Provides a context for working with sharded data structures.
-// Captures the initial shard index upon construction (indicating the shard
-// where all insertions go). Maintains the current shard index (round-robin,
-// indicating the shard currently used for extraction).
-// Can be or be not thread-safe depending on TCounter.
-template <class TCounter>
-class TShardedState
-{
-public:
-    TShardedState()
-        : InitialShardIndex_(GlobalCurrentShardIndex++ % ShardCount)
-        , CurrentShardIndex_(InitialShardIndex_)
-    { }
-
-    Y_FORCE_INLINE size_t GetInitialShardIndex() const
-    {
-        return InitialShardIndex_;
-    }
-
-    Y_FORCE_INLINE size_t GetNextShardIndex()
-    {
-        return ++CurrentShardIndex_ % ShardCount;
-    }
-
-private:
-    const size_t InitialShardIndex_;
-    TCounter CurrentShardIndex_;
-};
-
-using TLocalShardedState = TShardedState<size_t>;
-using TGlobalShardedState = TShardedState<std::atomic<size_t>>;
-
-// Implemented as a collection of free lists (each called a shard).
-// One needs TShardedState to access the sharded data structure.
-template <class T>
-class TShardedFreeList
-{
-public:
-    // First tries to extract an item from the initial shard;
-    // if failed then proceeds to all shards in round-robin fashion.
-    template <class TState>
-    T* Extract(TState* state)
-    {
-        if (auto* item = Shards_[state->GetInitialShardIndex()].Extract()) {
-            return item;
-        }
-        return ExtractRoundRobin(state);
-    }
-
-    // Attempts to extract an item from all shards in round-robin fashion.
-    template <class TState>
-    T* ExtractRoundRobin(TState* state)
-    {
-       for (size_t index = 0; index < ShardCount; ++index) {
-            if (auto* item = Shards_[state->GetNextShardIndex()].Extract()) {
-                return item;
-            }
-        }
-        return nullptr;
-    }
-
-    // Extracts items from all shards linking them together.
-    T* ExtractAll()
-    {
-        T* head = nullptr;
-        T* tail = nullptr;
-        for (auto& shard : Shards_) {
-            auto* item = shard.ExtractAll();
-            if (!head) {
-                head = item;
-            }
-            if (tail) {
-                YTALLOC_PARANOID_ASSERT(!tail->Next);
-                tail->Next = item;
-            } else {
-                tail = item;
-            }
-            while (tail && tail->Next) {
-                tail = tail->Next;
-            }
-        }
-        return head;
-    }
-
-    template <class TState>
-    void Put(TState* state, T* item)
-    {
-        Shards_[state->GetInitialShardIndex()].Put(item);
-    }
-
-private:
-    std::array<TFreeList<T>, ShardCount> Shards_;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Holds YTAlloc control knobs.
-// Thread safe.
-class TConfigurationManager
-{
-public:
-    void SetLargeUnreclaimableCoeff(double value)
-    {
-        LargeUnreclaimableCoeff_.store(value);
-    }
-
-    double GetLargeUnreclaimableCoeff() const
-    {
-        return LargeUnreclaimableCoeff_.load(std::memory_order_relaxed);
-    }
-
-
-    void SetMinLargeUnreclaimableBytes(size_t value)
-    {
-        MinLargeUnreclaimableBytes_.store(value);
-    }
-
-    void SetMaxLargeUnreclaimableBytes(size_t value)
-    {
-        MaxLargeUnreclaimableBytes_.store(value);
-    }
-
-    size_t GetMinLargeUnreclaimableBytes() const
-    {
-        return MinLargeUnreclaimableBytes_.load(std::memory_order_relaxed);
-    }
-
-    size_t GetMaxLargeUnreclaimableBytes() const
-    {
-        return MaxLargeUnreclaimableBytes_.load(std::memory_order_relaxed);
-    }
-
-
-    void SetTimingEventThreshold(TDuration value)
-    {
-        TimingEventThresholdNs_.store(value.MicroSeconds() * 1000);
-    }
-
-    i64 GetTimingEventThresholdNs() const
-    {
-        return TimingEventThresholdNs_.load(std::memory_order_relaxed);
-    }
-
-
-    void SetAllocationProfilingEnabled(bool value);
-
-    bool IsAllocationProfilingEnabled() const
-    {
-        return AllocationProfilingEnabled_.load();
-    }
-
-
-    Y_FORCE_INLINE bool GetAllocationProfilingSamplingRate()
-    {
-        return AllocationProfilingSamplingRate_.load();
-    }
-
-    void SetAllocationProfilingSamplingRate(double rate)
-    {
-        if (rate < 0) {
-            rate = 0;
-        }
-        if (rate > 1) {
-            rate = 1;
-        }
-        i64 rateX64K = static_cast<i64>(rate * (1ULL << 16));
-        AllocationProfilingSamplingRateX64K_.store(ClampVal<ui32>(rateX64K, 0, std::numeric_limits<ui16>::max() + 1));
-        AllocationProfilingSamplingRate_.store(rate);
-    }
-
-
-    Y_FORCE_INLINE bool IsSmallArenaAllocationProfilingEnabled(size_t rank)
-    {
-        return SmallArenaAllocationProfilingEnabled_[rank].load(std::memory_order_relaxed);
-    }
-
-    Y_FORCE_INLINE bool IsSmallArenaAllocationProfiled(size_t rank)
-    {
-        return IsSmallArenaAllocationProfilingEnabled(rank) && IsAllocationSampled();
-    }
-
-    void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value)
-    {
-        if (rank >= SmallRankCount) {
-            return;
-        }
-        SmallArenaAllocationProfilingEnabled_[rank].store(value);
-    }
-
-
-    Y_FORCE_INLINE bool IsLargeArenaAllocationProfilingEnabled(size_t rank)
-    {
-        return LargeArenaAllocationProfilingEnabled_[rank].load(std::memory_order_relaxed);
-    }
-
-    Y_FORCE_INLINE bool IsLargeArenaAllocationProfiled(size_t rank)
-    {
-        return IsLargeArenaAllocationProfilingEnabled(rank) && IsAllocationSampled();
-    }
-
-    void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value)
-    {
-        if (rank >= LargeRankCount) {
-            return;
-        }
-        LargeArenaAllocationProfilingEnabled_[rank].store(value);
-    }
-
-
-    Y_FORCE_INLINE int GetProfilingBacktraceDepth()
-    {
-        return ProfilingBacktraceDepth_.load();
-    }
-
-    void SetProfilingBacktraceDepth(int depth)
-    {
-        if (depth < 1) {
-            return;
-        }
-        if (depth > MaxAllocationProfilingBacktraceDepth) {
-            depth = MaxAllocationProfilingBacktraceDepth;
-        }
-        ProfilingBacktraceDepth_.store(depth);
-    }
-
-
-    Y_FORCE_INLINE size_t GetMinProfilingBytesUsedToReport()
-    {
-        return MinProfilingBytesUsedToReport_.load();
-    }
-
-    void SetMinProfilingBytesUsedToReport(size_t size)
-    {
-        MinProfilingBytesUsedToReport_.store(size);
-    }
-
-    void SetEnableEagerMemoryRelease(bool value)
-    {
-        EnableEagerMemoryRelease_.store(value);
-    }
-
-    bool GetEnableEagerMemoryRelease()
-    {
-        return EnableEagerMemoryRelease_.load(std::memory_order_relaxed);
-    }
-
-    void SetEnableMadvisePopulate(bool value)
-    {
-        EnableMadvisePopulate_.store(value);
-    }
-
-    bool GetEnableMadvisePopulate()
-    {
-        return EnableMadvisePopulate_.load(std::memory_order_relaxed);
-    }
-
-    void EnableStockpile()
-    {
-        StockpileEnabled_.store(true);
-    }
-
-    bool IsStockpileEnabled()
-    {
-        return StockpileEnabled_.load();
-    }
-
-    void SetStockpileInterval(TDuration value)
-    {
-        StockpileInterval_.store(value);
-    }
-
-    TDuration GetStockpileInterval()
-    {
-        return StockpileInterval_.load();
-    }
-
-    void SetStockpileThreadCount(int count)
-    {
-        StockpileThreadCount_.store(count);
-    }
-
-    int GetStockpileThreadCount()
-    {
-        return ClampVal(StockpileThreadCount_.load(), 0, MaxStockpileThreadCount);
-    }
-
-    void SetStockpileSize(size_t value)
-    {
-        StockpileSize_.store(value);
-    }
-
-    size_t GetStockpileSize()
-    {
-        return StockpileSize_.load();
-    }
-
-private:
-    std::atomic<double> LargeUnreclaimableCoeff_ = 0.05;
-    std::atomic<size_t> MinLargeUnreclaimableBytes_ = 128_MB;
-    std::atomic<size_t> MaxLargeUnreclaimableBytes_ = 10_GB;
-    std::atomic<i64> TimingEventThresholdNs_ = 10000000; // in ns, 10 ms by default
-
-    std::atomic<bool> AllocationProfilingEnabled_ = false;
-    std::atomic<double> AllocationProfilingSamplingRate_ = 1.0;
-    std::atomic<ui32> AllocationProfilingSamplingRateX64K_ = std::numeric_limits<ui32>::max();
-    std::array<std::atomic<bool>, SmallRankCount> SmallArenaAllocationProfilingEnabled_ = {};
-    std::array<std::atomic<bool>, LargeRankCount> LargeArenaAllocationProfilingEnabled_ = {};
-    std::atomic<int> ProfilingBacktraceDepth_ = 10;
-    std::atomic<size_t> MinProfilingBytesUsedToReport_ = 1_MB;
-
-    std::atomic<bool> EnableEagerMemoryRelease_ = true;
-    std::atomic<bool> EnableMadvisePopulate_ = false;
-
-    std::atomic<bool> StockpileEnabled_ = false;
-    std::atomic<TDuration> StockpileInterval_ = TDuration::MilliSeconds(10);
-    static constexpr int MaxStockpileThreadCount = 8;
-    std::atomic<int> StockpileThreadCount_ = 4;
-    std::atomic<size_t> StockpileSize_ = 1_GB;
-
-private:
-    bool IsAllocationSampled()
-    {
-        Y_POD_STATIC_THREAD(ui16) Counter;
-        return Counter++ < AllocationProfilingSamplingRateX64K_.load();
-    }
-};
-
-TExplicitlyConstructableSingleton<TConfigurationManager> ConfigurationManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-template <class TEvent, class TManager>
-class TEventLogManagerBase
-{
-public:
-    void DisableForCurrentThread()
-    {
-        TManager::DisabledForCurrentThread_ = true;
-    }
-
-    template <class... TArgs>
-    void EnqueueEvent(TArgs&&... args)
-    {
-        if (TManager::DisabledForCurrentThread_) {
-            return;
-        }
-
-        auto timestamp = TInstant::Now();
-        auto fiberId = NYTAlloc::GetCurrentFiberId();
-        auto guard = Guard(EventLock_);
-
-        auto event = TEvent(args...);
-        OnEvent(event);
-
-        if (EventCount_ >= EventBufferSize) {
-            return;
-        }
-
-        auto& enqueuedEvent = Events_[EventCount_++];
-        enqueuedEvent = std::move(event);
-        enqueuedEvent.Timestamp = timestamp;
-        enqueuedEvent.FiberId = fiberId;
-    }
-
-    void RunBackgroundTasks()
-    {
-        if (LogManager->IsLoggingEnabled()) {
-            for (const auto& event : PullEvents()) {
-                ProcessEvent(event);
-            }
-        }
-    }
-
-protected:
-    NThreading::TForkAwareSpinLock EventLock_;
-
-    virtual void OnEvent(const TEvent& event) = 0;
-
-    virtual void ProcessEvent(const TEvent& event) = 0;
-
-private:
-    static constexpr size_t EventBufferSize = 1000;
-    size_t EventCount_ = 0;
-    std::array<TEvent, EventBufferSize> Events_;
-
-    std::vector<TEvent> PullEvents()
-    {
-        std::vector<TEvent> events;
-        events.reserve(EventBufferSize);
-
-        auto guard = Guard(EventLock_);
-        for (size_t index = 0; index < EventCount_; ++index) {
-            events.push_back(Events_[index]);
-        }
-        EventCount_ = 0;
-        return events;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TTimingEvent
-{
-    ETimingEventType Type;
-    TDuration Duration;
-    size_t Size;
-    TInstant Timestamp;
-    TFiberId FiberId;
-
-    TTimingEvent()
-    { }
-
-    TTimingEvent(
-        ETimingEventType type,
-        TDuration duration,
-        size_t size)
-        : Type(type)
-        , Duration(duration)
-        , Size(size)
-    { }
-};
-
-class TTimingManager
-    : public TEventLogManagerBase<TTimingEvent, TTimingManager>
-{
-public:
-    TEnumIndexedArray<ETimingEventType, TTimingEventCounters> GetTimingEventCounters()
-    {
-        auto guard = Guard(EventLock_);
-        return EventCounters_;
-    }
-
-private:
-    TEnumIndexedArray<ETimingEventType, TTimingEventCounters> EventCounters_;
-
-    Y_POD_STATIC_THREAD(bool) DisabledForCurrentThread_;
-
-    friend class TEventLogManagerBase<TTimingEvent, TTimingManager>;
-
-    virtual void OnEvent(const TTimingEvent& event) override
-    {
-        auto& counters = EventCounters_[event.Type];
-        counters.Count += 1;
-        counters.Size += event.Size;
-    }
-
-    virtual void ProcessEvent(const TTimingEvent& event) override
-    {
-        YTALLOC_LOG_DEBUG("Timing event logged (Type: %s, Duration: %s, Size: %zu, Timestamp: %s, FiberId: %" PRIu64 ")",
-            ToString(event.Type).c_str(),
-            ToString(event.Duration).c_str(),
-            event.Size,
-            ToString(event.Timestamp).c_str(),
-            event.FiberId);
-    }
-};
-
-Y_POD_THREAD(bool) TTimingManager::DisabledForCurrentThread_;
-
-TExplicitlyConstructableSingleton<TTimingManager> TimingManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-i64 GetElapsedNs(const struct timespec& startTime, const struct timespec& endTime)
-{
-    if (Y_LIKELY(startTime.tv_sec == endTime.tv_sec)) {
-        return static_cast<i64>(endTime.tv_nsec) - static_cast<i64>(startTime.tv_nsec);
-    }
-
-    return
-        static_cast<i64>(endTime.tv_nsec) - static_cast<i64>(startTime.tv_nsec) +
-        (static_cast<i64>(endTime.tv_sec) - static_cast<i64>(startTime.tv_sec)) * 1000000000;
-}
-
-// Used to log statistics about long-running syscalls and lock acquisitions.
-class TTimingGuard
-    : public TNonCopyable
-{
-public:
-    explicit TTimingGuard(ETimingEventType eventType, size_t size = 0)
-        : EventType_(eventType)
-        , Size_(size)
-    {
-        ::clock_gettime(CLOCK_MONOTONIC, &StartTime_);
-    }
-
-    ~TTimingGuard()
-    {
-        auto elapsedNs = GetElapsedNs();
-        if (elapsedNs > ConfigurationManager->GetTimingEventThresholdNs()) {
-            TimingManager->EnqueueEvent(EventType_, TDuration::MicroSeconds(elapsedNs / 1000), Size_);
-        }
-    }
-
-private:
-    const ETimingEventType EventType_;
-    const size_t Size_;
-    struct timespec StartTime_;
-
-    i64 GetElapsedNs() const
-    {
-        struct timespec endTime;
-        ::clock_gettime(CLOCK_MONOTONIC, &endTime);
-        return NYTAlloc::GetElapsedNs(StartTime_, endTime);
-    }
-};
-
-template <class T>
-Y_FORCE_INLINE TGuard<T> GuardWithTiming(const T& lock)
-{
-    TTimingGuard timingGuard(ETimingEventType::Locking);
-    TGuard<T> lockingGuard(lock);
-    return lockingGuard;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-// A wrapper for mmap, mumap, and madvise calls.
-// The latter are invoked with MADV_POPULATE (if enabled) and MADV_FREE flags
-// and may fail if the OS support is missing. These failures are logged (once) and
-// handled as follows:
-// * if MADV_POPULATE fails then we fallback to manual per-page prefault
-// for all subsequent attempts;
-// * if MADV_FREE fails then it (and all subsequent attempts) is replaced with MADV_DONTNEED
-// (which is non-lazy and is less efficient but will somehow do).
-// Also this class mlocks all VMAs on startup to prevent pagefaults in our heavy binaries
-// from disturbing latency tails.
-class TMappedMemoryManager
-{
-public:
-    void* Map(uintptr_t hint, size_t size, int flags)
-    {
-        TTimingGuard timingGuard(ETimingEventType::Mmap, size);
-        auto* result = ::mmap(
-            reinterpret_cast<void*>(hint),
-            size,
-            PROT_READ | PROT_WRITE,
-            MAP_PRIVATE | MAP_ANONYMOUS | flags,
-            -1,
-            0);
-        if (result == MAP_FAILED) {
-            auto error = errno;
-            if (error == EEXIST && (flags & MAP_FIXED_NOREPLACE)) {
-                // Caller must retry with different hint address.
-                return result;
-            }
-            YTALLOC_VERIFY(error == ENOMEM);
-            ::fprintf(stderr, "*** YTAlloc has received ENOMEM error while trying to mmap %zu bytes\n",
-                size);
-            OomTrap();
-        }
-        return result;
-    }
-
-    void Unmap(void* ptr, size_t size)
-    {
-        TTimingGuard timingGuard(ETimingEventType::Munmap, size);
-        auto result = ::munmap(ptr, size);
-        YTALLOC_VERIFY(result == 0);
-    }
-
-    void DontDump(void* ptr, size_t size)
-    {
-        auto result = ::madvise(ptr, size, MADV_DONTDUMP);
-        // Must not fail.
-        YTALLOC_VERIFY(result == 0);
-    }
-
-    void PopulateFile(void* ptr, size_t size)
-    {
-        TTimingGuard timingGuard(ETimingEventType::FilePrefault, size);
-
-        auto* begin = static_cast<volatile char*>(ptr);
-        for (auto* current = begin; current < begin + size; current += PageSize) {
-            *current;
-        }
-    }
-
-    void PopulateReadOnly(void* ptr, size_t size)
-    {
-        if (!MadvisePopulateUnavailable_.load(std::memory_order_relaxed) &&
-            ConfigurationManager->GetEnableMadvisePopulate())
-        {
-            if (!TryMadvisePopulate(ptr, size)) {
-                MadvisePopulateUnavailable_.store(true);
-            }
-        }
-    }
-
-    void Populate(void* ptr, size_t size)
-    {
-        if (MadvisePopulateUnavailable_.load(std::memory_order_relaxed) ||
-            !ConfigurationManager->GetEnableMadvisePopulate())
-        {
-            DoPrefault(ptr, size);
-        } else if (!TryMadvisePopulate(ptr, size)) {
-            MadvisePopulateUnavailable_.store(true);
-            DoPrefault(ptr, size);
-        }
-    }
-
-    void Release(void* ptr, size_t size)
-    {
-        if (CanUseMadviseFree() && !ConfigurationManager->GetEnableEagerMemoryRelease()) {
-            DoMadviseFree(ptr, size);
-        } else {
-            DoMadviseDontNeed(ptr, size);
-        }
-    }
-
-    bool Stockpile(size_t size)
-    {
-        if (MadviseStockpileUnavailable_.load(std::memory_order_relaxed)) {
-            return false;
-        }
-        if (!TryMadviseStockpile(size)) {
-            MadviseStockpileUnavailable_.store(true);
-            return false;
-        }
-        return true;
-    }
-
-    void RunBackgroundTasks()
-    {
-        if (!LogManager->IsLoggingEnabled()) {
-            return;
-        }
-        if (IsBuggyKernel() && !BuggyKernelLogged_) {
-            YTALLOC_LOG_WARNING("Kernel is buggy; see KERNEL-118");
-            BuggyKernelLogged_ = true;
-        }
-        if (MadviseFreeSupported_ && !MadviseFreeSupportedLogged_) {
-            YTALLOC_LOG_INFO("MADV_FREE is supported");
-            MadviseFreeSupportedLogged_ = true;
-        }
-        if (MadviseFreeNotSupported_ && !MadviseFreeNotSupportedLogged_) {
-            YTALLOC_LOG_WARNING("MADV_FREE is not supported");
-            MadviseFreeNotSupportedLogged_ = true;
-        }
-        if (MadvisePopulateUnavailable_.load() && !MadvisePopulateUnavailableLogged_) {
-            YTALLOC_LOG_WARNING("MADV_POPULATE is not supported");
-            MadvisePopulateUnavailableLogged_ = true;
-        }
-        if (MadviseStockpileUnavailable_.load() && !MadviseStockpileUnavailableLogged_) {
-            YTALLOC_LOG_WARNING("MADV_STOCKPILE is not supported");
-            MadviseStockpileUnavailableLogged_ = true;
-        }
-    }
-
-private:
-    bool BuggyKernelLogged_ = false;
-
-    std::atomic<bool> MadviseFreeSupported_ = false;
-    bool MadviseFreeSupportedLogged_ = false;
-
-    std::atomic<bool> MadviseFreeNotSupported_ = false;
-    bool MadviseFreeNotSupportedLogged_ = false;
-
-    std::atomic<bool> MadvisePopulateUnavailable_ = false;
-    bool MadvisePopulateUnavailableLogged_ = false;
-
-    std::atomic<bool> MadviseStockpileUnavailable_ = false;
-    bool MadviseStockpileUnavailableLogged_ = false;
-
-private:
-    bool TryMadvisePopulate(void* ptr, size_t size)
-    {
-        TTimingGuard timingGuard(ETimingEventType::MadvisePopulate, size);
-        auto result = ::madvise(ptr, size, MADV_POPULATE);
-        if (result != 0) {
-            auto error = errno;
-            YTALLOC_VERIFY(error == EINVAL || error == ENOMEM);
-            if (error == ENOMEM) {
-                ::fprintf(stderr, "*** YTAlloc has received ENOMEM error while trying to madvise(MADV_POPULATE) %zu bytes\n",
-                    size);
-                OomTrap();
-            }
-            return false;
-        }
-        return true;
-    }
-
-    void DoPrefault(void* ptr, size_t size)
-    {
-        TTimingGuard timingGuard(ETimingEventType::Prefault, size);
-        auto* begin = static_cast<char*>(ptr);
-        for (auto* current = begin; current < begin + size; current += PageSize) {
-            *current = 0;
-        }
-    }
-
-    bool CanUseMadviseFree()
-    {
-        if (MadviseFreeSupported_.load()) {
-            return true;
-        }
-        if (MadviseFreeNotSupported_.load()) {
-            return false;
-        }
-
-        if (IsBuggyKernel()) {
-            MadviseFreeNotSupported_.store(true);
-        } else {
-            auto* ptr = Map(0, PageSize, 0);
-            if (::madvise(ptr, PageSize, MADV_FREE) == 0) {
-                MadviseFreeSupported_.store(true);
-            } else {
-                MadviseFreeNotSupported_.store(true);
-            }
-            Unmap(ptr, PageSize);
-        }
-
-        // Will not recurse.
-        return CanUseMadviseFree();
-    }
-
-    void DoMadviseDontNeed(void* ptr, size_t size)
-    {
-        TTimingGuard timingGuard(ETimingEventType::MadviseDontNeed, size);
-        auto result = ::madvise(ptr, size, MADV_DONTNEED);
-        if (result != 0) {
-            auto error = errno;
-            // Failure is possible for locked pages.
-            Y_ABORT_UNLESS(error == EINVAL);
-        }
-    }
-
-    void DoMadviseFree(void* ptr, size_t size)
-    {
-        TTimingGuard timingGuard(ETimingEventType::MadviseFree, size);
-        auto result = ::madvise(ptr, size, MADV_FREE);
-        if (result != 0) {
-            auto error = errno;
-            // Failure is possible for locked pages.
-            YTALLOC_VERIFY(error == EINVAL);
-        }
-    }
-
-    bool TryMadviseStockpile(size_t size)
-    {
-        auto result = ::madvise(nullptr, size, MADV_STOCKPILE);
-        if (result != 0) {
-            auto error = errno;
-            if (error == ENOMEM || error == EAGAIN || error == EINTR) {
-                // The call is advisory, ignore ENOMEM, EAGAIN, and EINTR.
-                return true;
-            }
-            YTALLOC_VERIFY(error == EINVAL);
-            return false;
-        }
-        return true;
-    }
-
-    // Some kernels are known to contain bugs in MADV_FREE; see https://st.yandex-team.ru/KERNEL-118.
-    bool IsBuggyKernel()
-    {
-#ifdef _linux_
-        static const bool result = [] () {
-            struct utsname buf;
-            YTALLOC_VERIFY(uname(&buf) == 0);
-            if (strverscmp(buf.release, "4.4.1-1") >= 0 &&
-                strverscmp(buf.release, "4.4.96-44") < 0)
-            {
-                return true;
-            }
-            if (strverscmp(buf.release, "4.14.1-1") >= 0 &&
-                strverscmp(buf.release, "4.14.79-33") < 0)
-            {
-                return true;
-            }
-            return false;
-        }();
-        return result;
-#else
-        return false;
-#endif
-    }
-};
-
-TExplicitlyConstructableSingleton<TMappedMemoryManager> MappedMemoryManager;
-
-////////////////////////////////////////////////////////////////////////////////
-// System allocator
-
-// Each system allocation is prepended with such a header.
-struct TSystemBlobHeader
-{
-    explicit TSystemBlobHeader(size_t size)
-        : Size(size)
-    { }
-
-    size_t Size;
-    char Padding[8];
-};
-
-CHECK_HEADER_ALIGNMENT(TSystemBlobHeader)
-
-// Used for some internal allocations.
-// Delgates directly to TMappedMemoryManager.
-class TSystemAllocator
-{
-public:
-    void* Allocate(size_t size);
-    void Free(void* ptr);
-
-private:
-    std::atomic<uintptr_t> CurrentPtr_ = SystemZoneStart;
-};
-
-TExplicitlyConstructableSingleton<TSystemAllocator> SystemAllocator;
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Deriving from this class makes instances bound to TSystemAllocator.
-struct TSystemAllocatable
-{
-    void* operator new(size_t size) noexcept
-    {
-        return SystemAllocator->Allocate(size);
-    }
-
-    void* operator new[](size_t size) noexcept
-    {
-        return SystemAllocator->Allocate(size);
-    }
-
-    void operator delete(void* ptr) noexcept
-    {
-        SystemAllocator->Free(ptr);
-    }
-
-    void operator delete[](void* ptr) noexcept
-    {
-        SystemAllocator->Free(ptr);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Maintains a pool of objects.
-// Objects are allocated in groups each containing BatchSize instances.
-// The actual allocation is carried out by TSystemAllocator.
-// Memory is never actually reclaimed; freed instances are put into TFreeList.
-template <class T, size_t BatchSize>
-class TSystemPool
-{
-public:
-    T* Allocate()
-    {
-        while (true) {
-            auto* obj = FreeList_.Extract();
-            if (Y_LIKELY(obj)) {
-                new (obj) T();
-                return obj;
-            }
-            AllocateMore();
-        }
-    }
-
-    void Free(T* obj)
-    {
-        obj->T::~T();
-        PoisonFreedRange(obj, sizeof(T));
-        FreeList_.Put(obj);
-    }
-
-private:
-    TFreeList<T> FreeList_;
-
-private:
-    void AllocateMore()
-    {
-        auto* objs = static_cast<T*>(SystemAllocator->Allocate(sizeof(T) * BatchSize));
-        for (size_t index = 0; index < BatchSize; ++index) {
-            auto* obj = objs + index;
-            FreeList_.Put(obj);
-        }
-    }
-};
-
-// A sharded analogue TSystemPool.
-template <class T, size_t BatchSize>
-class TShardedSystemPool
-{
-public:
-    template <class TState>
-    T* Allocate(TState* state)
-    {
-        if (auto* obj = FreeLists_[state->GetInitialShardIndex()].Extract()) {
-            new (obj) T();
-            return obj;
-        }
-
-        while (true) {
-            for (size_t index = 0; index < ShardCount; ++index) {
-                if (auto* obj = FreeLists_[state->GetNextShardIndex()].Extract()) {
-                    new (obj) T();
-                    return obj;
-                }
-            }
-            AllocateMore();
-        }
-    }
-
-    template <class TState>
-    void Free(TState* state, T* obj)
-    {
-        obj->T::~T();
-        PoisonFreedRange(obj, sizeof(T));
-        FreeLists_[state->GetInitialShardIndex()].Put(obj);
-    }
-
-private:
-    std::array<TFreeList<T>, ShardCount> FreeLists_;
-
-private:
-    void AllocateMore()
-    {
-        auto* objs = static_cast<T*>(SystemAllocator->Allocate(sizeof(T) * BatchSize));
-        for (size_t index = 0; index < BatchSize; ++index) {
-            auto* obj = objs + index;
-            FreeLists_[index % ShardCount].Put(obj);
-        }
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Handles allocations inside a zone of memory given by its start and end pointers.
-// Each allocation is a separate mapped region of memory.
-// A special care is taken to guarantee that all allocated regions fall inside the zone.
-class TZoneAllocator
-{
-public:
-    TZoneAllocator(uintptr_t zoneStart, uintptr_t zoneEnd)
-        : ZoneStart_(zoneStart)
-        , ZoneEnd_(zoneEnd)
-        , Current_(zoneStart)
-    {
-        YTALLOC_VERIFY(ZoneStart_ % PageSize == 0);
-    }
-
-    void* Allocate(size_t size, int flags)
-    {
-        YTALLOC_VERIFY(size % PageSize == 0);
-        bool restarted = false;
-        while (true) {
-            auto hint = (Current_ += size) - size;
-            if (reinterpret_cast<uintptr_t>(hint) + size > ZoneEnd_) {
-                if (restarted) {
-                    ::fprintf(stderr, "*** YTAlloc was unable to mmap %zu bytes in zone %" PRIx64 "--%" PRIx64 "\n",
-                        size,
-                        ZoneStart_,
-                        ZoneEnd_);
-                    OomTrap();
-                }
-                restarted = true;
-                Current_ = ZoneStart_;
-            } else {
-                char* ptr = static_cast<char*>(MappedMemoryManager->Map(
-                    hint,
-                    size,
-                    MAP_FIXED_NOREPLACE | flags));
-                if (reinterpret_cast<uintptr_t>(ptr) == hint) {
-                    return ptr;
-                }
-                if (ptr != MAP_FAILED) {
-                    MappedMemoryManager->Unmap(ptr, size);
-                }
-            }
-        }
-    }
-
-    void Free(void* ptr, size_t size)
-    {
-        MappedMemoryManager->Unmap(ptr, size);
-    }
-
-private:
-    const uintptr_t ZoneStart_;
-    const uintptr_t ZoneEnd_;
-
-    std::atomic<uintptr_t> Current_;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-// YTAlloc supports tagged allocations.
-// Since the total number of tags can be huge, a two-level scheme is employed.
-// Possible tags are arranged into sets each containing TaggedCounterSetSize tags.
-// There are up to MaxTaggedCounterSets in total.
-// Upper 4 sets are reserved for profiled allocations.
-constexpr size_t TaggedCounterSetSize = 16384;
-constexpr size_t AllocationProfilingTaggedCounterSets = 4;
-constexpr size_t MaxTaggedCounterSets = 256 + AllocationProfilingTaggedCounterSets;
-
-constexpr size_t MaxCapturedAllocationBacktraces = 65000;
-static_assert(
-    MaxCapturedAllocationBacktraces < AllocationProfilingTaggedCounterSets * TaggedCounterSetSize,
-    "MaxCapturedAllocationBacktraces is too big");
-
-constexpr TMemoryTag AllocationProfilingMemoryTagBase = TaggedCounterSetSize * (MaxTaggedCounterSets - AllocationProfilingTaggedCounterSets);
-constexpr TMemoryTag AllocationProfilingUnknownMemoryTag = AllocationProfilingMemoryTagBase + MaxCapturedAllocationBacktraces;
-
-static_assert(
-    MaxMemoryTag == TaggedCounterSetSize * (MaxTaggedCounterSets - AllocationProfilingTaggedCounterSets) - 1,
-    "Wrong MaxMemoryTag");
-
-template <class TCounter>
-using TUntaggedTotalCounters = TEnumIndexedArray<EBasicCounter, TCounter>;
-
-template <class TCounter>
-struct TTaggedTotalCounterSet
-    : public TSystemAllocatable
-{
-    std::array<TEnumIndexedArray<EBasicCounter, TCounter>, TaggedCounterSetSize> Counters;
-};
-
-using TLocalTaggedBasicCounterSet = TTaggedTotalCounterSet<ssize_t>;
-using TGlobalTaggedBasicCounterSet = TTaggedTotalCounterSet<std::atomic<ssize_t>>;
-
-template <class TCounter>
-struct TTotalCounters
-{
-    // The sum of counters across all tags.
-    TUntaggedTotalCounters<TCounter> CumulativeTaggedCounters;
-
-    // Counters for untagged allocations.
-    TUntaggedTotalCounters<TCounter> UntaggedCounters;
-
-    // Access to tagged counters may involve creation of a new tag set.
-    // For simplicity, we separate the read-side (TaggedCounterSets) and the write-side (TaggedCounterSetHolders).
-    // These arrays contain virtually identical data (up to std::unique_ptr and std::atomic semantic differences).
-    std::array<std::atomic<TTaggedTotalCounterSet<TCounter>*>, MaxTaggedCounterSets> TaggedCounterSets{};
-    std::array<std::unique_ptr<TTaggedTotalCounterSet<TCounter>>, MaxTaggedCounterSets> TaggedCounterSetHolders;
-
-    // Protects TaggedCounterSetHolders from concurrent updates.
-    NThreading::TForkAwareSpinLock TaggedCounterSetsLock;
-
-    // Returns null if the set is not yet constructed.
-    Y_FORCE_INLINE TTaggedTotalCounterSet<TCounter>* FindTaggedCounterSet(size_t index) const
-    {
-        return TaggedCounterSets[index].load();
-    }
-
-    // Constructs the set on first access.
-    TTaggedTotalCounterSet<TCounter>* GetOrCreateTaggedCounterSet(size_t index)
-    {
-        auto* set = TaggedCounterSets[index].load();
-        if (Y_LIKELY(set)) {
-            return set;
-        }
-
-        auto guard = GuardWithTiming(TaggedCounterSetsLock);
-        auto& setHolder = TaggedCounterSetHolders[index];
-        if (!setHolder) {
-            setHolder = std::make_unique<TTaggedTotalCounterSet<TCounter>>();
-            TaggedCounterSets[index] = setHolder.get();
-        }
-        return setHolder.get();
-    }
-};
-
-using TLocalSystemCounters = TEnumIndexedArray<ESystemCounter, ssize_t>;
-using TGlobalSystemCounters = TEnumIndexedArray<ESystemCounter, std::atomic<ssize_t>>;
-
-using TLocalSmallCounters = TEnumIndexedArray<ESmallArenaCounter, ssize_t>;
-using TGlobalSmallCounters = TEnumIndexedArray<ESmallArenaCounter, std::atomic<ssize_t>>;
-
-using TLocalLargeCounters = TEnumIndexedArray<ELargeArenaCounter, ssize_t>;
-using TGlobalLargeCounters = TEnumIndexedArray<ELargeArenaCounter, std::atomic<ssize_t>>;
-
-using TLocalHugeCounters = TEnumIndexedArray<EHugeCounter, ssize_t>;
-using TGlobalHugeCounters = TEnumIndexedArray<EHugeCounter, std::atomic<ssize_t>>;
-
-using TLocalUndumpableCounters = TEnumIndexedArray<EUndumpableCounter, ssize_t>;
-using TGlobalUndumpableCounters = TEnumIndexedArray<EUndumpableCounter, std::atomic<ssize_t>>;
-
-Y_FORCE_INLINE ssize_t LoadCounter(ssize_t counter)
-{
-    return counter;
-}
-
-Y_FORCE_INLINE ssize_t LoadCounter(const std::atomic<ssize_t>& counter)
-{
-    return counter.load();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TMmapObservationEvent
-{
-    size_t Size;
-    std::array<void*, MaxAllocationProfilingBacktraceDepth> Frames;
-    int FrameCount;
-    TInstant Timestamp;
-    TFiberId FiberId;
-
-    TMmapObservationEvent() = default;
-
-    TMmapObservationEvent(
-        size_t size,
-        std::array<void*, MaxAllocationProfilingBacktraceDepth> frames,
-        int frameCount)
-        : Size(size)
-        , Frames(frames)
-        , FrameCount(frameCount)
-    { }
-};
-
-class TMmapObservationManager
-    : public TEventLogManagerBase<TMmapObservationEvent, TMmapObservationManager>
-{
-public:
-    void SetBacktraceFormatter(TBacktraceFormatter formatter)
-    {
-        BacktraceFormatter_.store(formatter);
-    }
-
-private:
-    std::atomic<TBacktraceFormatter> BacktraceFormatter_ = nullptr;
-
-    Y_POD_STATIC_THREAD(bool) DisabledForCurrentThread_;
-
-    friend class TEventLogManagerBase<TMmapObservationEvent, TMmapObservationManager>;
-
-    virtual void OnEvent(const TMmapObservationEvent& /*event*/) override
-    { }
-
-    virtual void ProcessEvent(const TMmapObservationEvent& event) override
-    {
-        YTALLOC_LOG_DEBUG("Large arena mmap observed (Size: %zu, Timestamp: %s, FiberId: %" PRIx64 ")",
-            event.Size,
-            ToString(event.Timestamp).c_str(),
-            event.FiberId);
-
-        if (auto backtraceFormatter = BacktraceFormatter_.load()) {
-            auto backtrace = backtraceFormatter(const_cast<void**>(event.Frames.data()), event.FrameCount);
-            YTALLOC_LOG_DEBUG("YTAlloc stack backtrace (Stack: %s)",
-                backtrace.c_str());
-        }
-    }
-};
-
-Y_POD_THREAD(bool) TMmapObservationManager::DisabledForCurrentThread_;
-
-TExplicitlyConstructableSingleton<TMmapObservationManager> MmapObservationManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-// A per-thread structure containing counters, chunk caches etc.
-struct TThreadState
-    : public TFreeListItemBase<TThreadState>
-    , public TLocalShardedState
-{
-    // TThreadState instances of all alive threads are put into a double-linked intrusive list.
-    // This is a pair of next/prev pointers connecting an instance of TThreadState to its neighbors.
-    TIntrusiveLinkedListNode<TThreadState> RegistryNode;
-
-    // Pointers to the respective parts of TThreadManager::ThreadControlWord_.
-    // If null then the thread is already destroyed (but TThreadState may still live for a while
-    // due to ref-counting).
-    ui8* AllocationProfilingEnabled;
-    ui8* BackgroundThreadStarted;
-
-    // TThreadStates are ref-counted.
-    // TThreadManager::EnumerateThreadStates enumerates the registered states and acquires
-    // a temporary reference preventing these states from being destructed. This provides
-    // for shorter periods of time the global lock needs to be held.
-    int RefCounter = 1;
-
-    // Per-thread counters.
-    TTotalCounters<ssize_t> TotalCounters;
-    std::array<TLocalLargeCounters, LargeRankCount> LargeArenaCounters;
-    TLocalUndumpableCounters UndumpableCounters;
-
-    // Each thread maintains caches of small chunks.
-    // One cache is for tagged chunks; the other is for untagged ones.
-    // Each cache contains up to MaxCachedChunksPerRank chunks per any rank.
-    // Special sentinels are placed to distinguish the boundaries of region containing
-    // pointers of a specific rank. This enables a tiny-bit faster inplace boundary checks.
-
-    static constexpr uintptr_t LeftSentinel = 1;
-    static constexpr uintptr_t RightSentinel = 2;
-
-    struct TSmallBlobCache
-    {
-        TSmallBlobCache()
-        {
-            void** chunkPtrs = CachedChunks.data();
-            for (size_t rank = 0; rank < SmallRankCount; ++rank) {
-                RankToCachedChunkPtrHead[rank] = chunkPtrs;
-                chunkPtrs[0] = reinterpret_cast<void*>(LeftSentinel);
-                chunkPtrs[MaxCachedChunksPerRank + 1] = reinterpret_cast<void*>(RightSentinel);
-
-#ifdef YTALLOC_PARANOID
-                RankToCachedChunkPtrTail[rank] = chunkPtrs;
-                CachedChunkFull[rank] = false;
-
-                RankToCachedChunkLeftBorder[rank] = chunkPtrs;
-                RankToCachedChunkRightBorder[rank] = chunkPtrs + MaxCachedChunksPerRank + 1;
-#endif
-                chunkPtrs += MaxCachedChunksPerRank + 2;
-            }
-        }
-
-        // For each rank we have a segment of pointers in CachedChunks with the following layout:
-        //   LCC[C]........R
-        // Legend:
-        //   .  = garbage
-        //   L  = left sentinel
-        //   R  = right sentinel
-        //   C  = cached pointer
-        //  [C] = current cached pointer
-        //
-        // Under YTALLOC_PARANOID the following layout is used:
-        //   L.[T]CCC[H]...R
-        // Legend:
-        //   [H] = head cached pointer, put chunks here
-        //   [T] = tail cached pointer, take chunks from here
-
-        //  +2 is for two sentinels
-        std::array<void*, SmallRankCount * (MaxCachedChunksPerRank + 2)> CachedChunks{};
-
-        // Pointer to [P] for each rank.
-        std::array<void**, SmallRankCount> RankToCachedChunkPtrHead{};
-
-#ifdef YTALLOC_PARANOID
-        // Pointers to [L] and [R] for each rank.
-        std::array<void**, SmallRankCount> RankToCachedChunkLeftBorder{};
-        std::array<void**, SmallRankCount> RankToCachedChunkRightBorder{};
-
-        std::array<void**, SmallRankCount> RankToCachedChunkPtrTail{};
-        std::array<bool, SmallRankCount> CachedChunkFull{};
-#endif
-    };
-    TEnumIndexedArray<EAllocationKind, TSmallBlobCache> SmallBlobCache;
-};
-
-struct TThreadStateToRegistryNode
-{
-    auto operator() (TThreadState* state) const
-    {
-        return &state->RegistryNode;
-    }
-};
-
-// Manages all registered threads and controls access to TThreadState.
-class TThreadManager
-{
-public:
-    TThreadManager()
-    {
-        pthread_key_create(&ThreadDtorKey_, DestroyThread);
-
-        NThreading::RegisterAtForkHandlers(
-            nullptr,
-            nullptr,
-            [=] { AfterFork(); });
-    }
-
-    // Returns TThreadState for the current thread; the caller guarantees that this
-    // state is initialized and is not destroyed yet.
-    static TThreadState* GetThreadStateUnchecked();
-
-    // Returns TThreadState for the current thread; may return null.
-    static TThreadState* FindThreadState();
-
-    // Returns TThreadState for the current thread; may not return null
-    // (but may crash if TThreadState is already destroyed).
-    static TThreadState* GetThreadStateChecked()
-    {
-        auto* state = FindThreadState();
-        YTALLOC_VERIFY(state);
-        return state;
-    }
-
-    // Enumerates all threads and invokes func passing TThreadState instances.
-    // func must not throw but can take arbitrary time; no locks are being held while it executes.
-    template <class THandler>
-    void EnumerateThreadStatesAsync(const THandler& handler) noexcept
-    {
-        TMemoryTagGuard guard(NullMemoryTag);
-
-        std::vector<TThreadState*> states;
-        states.reserve(1024); // must be enough in most cases
-
-        auto unrefStates = [&] {
-            // Releasing references also requires global lock to be held to avoid getting zombies above.
-            auto guard = GuardWithTiming(ThreadRegistryLock_);
-            for (auto* state : states) {
-                UnrefThreadState(state);
-            }
-        };
-
-        auto tryRefStates = [&] {
-            // Only hold this guard for a small period of time to reference all the states.
-            auto guard = GuardWithTiming(ThreadRegistryLock_);
-            auto* current = ThreadRegistry_.GetFront();
-            while (current) {
-                if (states.size() == states.capacity()) {
-                    // Cannot allocate while holding ThreadRegistryLock_ due to a possible deadlock as follows:
-                    // EnumerateThreadStatesAsync -> StartBackgroundThread -> EnumerateThreadStatesSync
-                    // (many other scenarios are also possible).
-                    guard.Release();
-                    unrefStates();
-                    states.clear();
-                    states.reserve(states.capacity() * 2);
-                    return false;
-                }
-                RefThreadState(current);
-                states.push_back(current);
-                current = current->RegistryNode.Next;
-            }
-            return true;
-        };
-
-        while (!tryRefStates()) ;
-
-        for (auto* state : states) {
-            handler(state);
-        }
-
-        unrefStates();
-    }
-
-    // Similar to EnumerateThreadStatesAsync but holds the global lock while enumerating the threads.
-    // Also invokes a given prologue functor while holding the thread registry lock.
-    // Handler and prologue calls must be fast and must not allocate.
-    template <class TPrologue, class THandler>
-    void EnumerateThreadStatesSync(const TPrologue& prologue, const THandler& handler) noexcept
-    {
-        auto guard = GuardWithTiming(ThreadRegistryLock_);
-        prologue();
-        auto* current = ThreadRegistry_.GetFront();
-        while (current) {
-            handler(current);
-            current = current->RegistryNode.Next;
-        }
-    }
-
-
-    // We store a special 64-bit "thread control word" in TLS encapsulating the following
-    // crucial per-thread parameters:
-    // * the current memory tag
-    // * a flag indicating that a valid TThreadState is known to exists
-    // (and can be obtained via GetThreadStateUnchecked)
-    // * a flag indicating that allocation profiling is enabled
-    // * a flag indicating that background thread is started
-    // Thread control word is fetched via GetThreadControlWord and is compared
-    // against FastPathControlWord to see if the fast path can be taken.
-    // The latter happens when no memory tagging is configured, TThreadState is
-    // valid, allocation profiling is disabled, and background thread is started.
-
-    // The mask for extracting memory tag from thread control word.
-    static constexpr ui64 MemoryTagControlWordMask = 0xffffffff;
-    // ThreadStateValid is on.
-    static constexpr ui64 ThreadStateValidControlWordMask = (1ULL << 32);
-    // AllocationProfiling is on.
-    static constexpr ui64 AllocationProfilingEnabledControlWordMask = (1ULL << 40);
-    // All background thread are properly started.
-    static constexpr ui64 BackgroundThreadStartedControlWorkMask = (1ULL << 48);
-    // Memory tag is NullMemoryTag; thread state is valid.
-    static constexpr ui64 FastPathControlWord =
-        BackgroundThreadStartedControlWorkMask |
-        ThreadStateValidControlWordMask |
-        NullMemoryTag;
-
-    Y_FORCE_INLINE static ui64 GetThreadControlWord()
-    {
-        return (&ThreadControlWord_)->Value;
-    }
-
-
-    static TMemoryTag GetCurrentMemoryTag()
-    {
-        return (&ThreadControlWord_)->Parts.MemoryTag;
-    }
-
-    static void SetCurrentMemoryTag(TMemoryTag tag)
-    {
-        Y_ABORT_UNLESS(tag <= MaxMemoryTag);
-        (&ThreadControlWord_)->Parts.MemoryTag = tag;
-    }
-
-
-    static EMemoryZone GetCurrentMemoryZone()
-    {
-        return CurrentMemoryZone_;
-    }
-
-    static void SetCurrentMemoryZone(EMemoryZone zone)
-    {
-        CurrentMemoryZone_ = zone;
-    }
-
-
-    static void SetCurrentFiberId(TFiberId id)
-    {
-        CurrentFiberId_ = id;
-    }
-
-    static TFiberId GetCurrentFiberId()
-    {
-        return CurrentFiberId_;
-    }
-
-private:
-    static void DestroyThread(void*);
-
-    TThreadState* AllocateThreadState();
-
-    void RefThreadState(TThreadState* state)
-    {
-        auto result = ++state->RefCounter;
-        Y_ABORT_UNLESS(result > 1);
-    }
-
-    void UnrefThreadState(TThreadState* state)
-    {
-        auto result = --state->RefCounter;
-        Y_ABORT_UNLESS(result >= 0);
-        if (result == 0) {
-            DestroyThreadState(state);
-        }
-    }
-
-    void DestroyThreadState(TThreadState* state);
-
-    void AfterFork();
-
-private:
-    // TThreadState instance for the current thread.
-    // Initially null, then initialized when first needed.
-    // TThreadState is destroyed upon thread termination (which is detected with
-    // the help of pthread_key_create machinery), so this pointer can become null again.
-    Y_POD_STATIC_THREAD(TThreadState*) ThreadState_;
-
-    // Initially false, then set to true then TThreadState is destroyed.
-    // If the thread requests for its state afterwards, null is returned and no new state is (re-)created.
-    // The caller must be able to deal with it.
-    Y_POD_STATIC_THREAD(bool) ThreadStateDestroyed_;
-
-    union TThreadControlWord
-    {
-        ui64 __attribute__((__may_alias__)) Value;
-        struct TParts
-        {
-            // The current memory tag used in all allocations by this thread.
-            ui32 __attribute__((__may_alias__)) MemoryTag;
-            // Indicates if a valid TThreadState exists and can be obtained via GetThreadStateUnchecked.
-            ui8 __attribute__((__may_alias__)) ThreadStateValid;
-            // Indicates if allocation profiling is on.
-            ui8 __attribute__((__may_alias__)) AllocationProfilingEnabled;
-            // Indicates if all background threads are properly started.
-            ui8 __attribute__((__may_alias__)) BackgroundThreadStarted;
-            ui8 Padding[2];
-        } Parts;
-    };
-    Y_POD_STATIC_THREAD(TThreadControlWord) ThreadControlWord_;
-
-    // See memory zone API.
-    Y_POD_STATIC_THREAD(EMemoryZone) CurrentMemoryZone_;
-
-    // See fiber id API.
-    Y_POD_STATIC_THREAD(TFiberId) CurrentFiberId_;
-
-    pthread_key_t ThreadDtorKey_;
-
-    static constexpr size_t ThreadStatesBatchSize = 1;
-    TSystemPool<TThreadState, ThreadStatesBatchSize> ThreadStatePool_;
-
-    NThreading::TForkAwareSpinLock ThreadRegistryLock_;
-    TIntrusiveLinkedList<TThreadState, TThreadStateToRegistryNode> ThreadRegistry_;
-};
-
-Y_POD_THREAD(TThreadState*) TThreadManager::ThreadState_;
-Y_POD_THREAD(bool) TThreadManager::ThreadStateDestroyed_;
-Y_POD_THREAD(TThreadManager::TThreadControlWord) TThreadManager::ThreadControlWord_;
-Y_POD_THREAD(EMemoryZone) TThreadManager::CurrentMemoryZone_;
-Y_POD_THREAD(TFiberId) TThreadManager::CurrentFiberId_;
-
-TExplicitlyConstructableSingleton<TThreadManager> ThreadManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-void TConfigurationManager::SetAllocationProfilingEnabled(bool value)
-{
-    // Update threads' TLS.
-    ThreadManager->EnumerateThreadStatesSync(
-        [&] {
-            AllocationProfilingEnabled_.store(value);
-        },
-        [&] (auto* state) {
-            if (state->AllocationProfilingEnabled) {
-                *state->AllocationProfilingEnabled = value;
-            }
-        });
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Backtrace Manager
-//
-// Captures backtraces observed during allocations and assigns memory tags to them.
-// Memory tags are chosen sequentially starting from AllocationProfilingMemoryTagBase.
-//
-// For each backtrace we compute a 64-bit hash and use it as a key in a certain concurrent hashmap.
-// This hashmap is organized into BucketCount buckets, each consisting of BucketSize slots.
-//
-// Backtrace hash is translated into bucket index by taking the appropriate number of
-// its lower bits. For each slot, we remember a 32-bit fingerprint, which is
-// just the next 32 bits of the backtrace's hash, and the (previously assigned) memory tag.
-//
-// Upon access to the hashtable, the bucket is first scanned optimistically, without taking
-// any locks. In case of a miss, a per-bucket spinlock is acquired and the bucket is rescanned.
-//
-// The above scheme may involve collisions but we neglect their probability.
-//
-// If the whole hash table overflows (i.e. a total of MaxCapturedAllocationBacktraces
-// backtraces are captured) or the bucket overflows (i.e. all of its slots become occupied),
-// the allocation is annotated with AllocationProfilingUnknownMemoryTag. Such allocations
-// appear as having no backtrace whatsoever in the profiling reports.
-
-class TBacktraceManager
-{
-public:
-    // Sets the provider used for collecting backtraces when allocation profiling
-    // is turned ON.
-    void SetBacktraceProvider(TBacktraceProvider provider)
-    {
-        BacktraceProvider_.store(provider);
-    }
-
-    // Captures the backtrace and inserts it into the hashtable.
-    TMemoryTag GetMemoryTagFromBacktrace(int framesToSkip)
-    {
-        std::array<void*, MaxAllocationProfilingBacktraceDepth> frames;
-        auto backtraceProvider = BacktraceProvider_.load();
-        if (!backtraceProvider) {
-            return NullMemoryTag;
-        }
-        auto frameCount  = backtraceProvider(frames.data(), ConfigurationManager->GetProfilingBacktraceDepth(), framesToSkip);
-        auto hash = GetBacktraceHash(frames.data(), frameCount);
-        return CaptureBacktrace(hash, frames.data(), frameCount);
-    }
-
-    // Returns the backtrace corresponding to the given tag, if any.
-    std::optional<TBacktrace> FindBacktrace(TMemoryTag tag)
-    {
-        if (tag < AllocationProfilingMemoryTagBase ||
-            tag >= AllocationProfilingMemoryTagBase + MaxCapturedAllocationBacktraces)
-        {
-            return std::nullopt;
-        }
-        const auto& entry = Backtraces_[tag - AllocationProfilingMemoryTagBase];
-        if (!entry.Captured.load()) {
-            return std::nullopt;
-        }
-        return entry.Backtrace;
-    }
-
-private:
-    static constexpr int Log2BucketCount = 16;
-    static constexpr int BucketCount = 1 << Log2BucketCount;
-    static constexpr int BucketSize = 8;
-
-    std::atomic<TBacktraceProvider> BacktraceProvider_ = nullptr;
-
-    std::array<std::array<std::atomic<ui32>, BucketSize>, BucketCount> Fingerprints_= {};
-    std::array<std::array<std::atomic<TMemoryTag>, BucketSize>, BucketCount> MemoryTags_ = {};
-    std::array<NThreading::TForkAwareSpinLock, BucketCount> BucketLocks_;
-    std::atomic<TMemoryTag> CurrentMemoryTag_ = AllocationProfilingMemoryTagBase;
-
-    struct TBacktraceEntry
-    {
-        TBacktrace Backtrace;
-        std::atomic<bool> Captured = false;
-    };
-
-    std::array<TBacktraceEntry, MaxCapturedAllocationBacktraces> Backtraces_;
-
-private:
-    static size_t GetBacktraceHash(void** frames, int frameCount)
-    {
-        size_t hash = 0;
-        for (int index = 0; index < frameCount; ++index) {
-            hash = CombineHashes(hash, THash<void*>()(frames[index]));
-        }
-        return hash;
-    }
-
-    TMemoryTag CaptureBacktrace(size_t hash, void** frames, int frameCount)
-    {
-        size_t bucketIndex = hash % BucketCount;
-        ui32 fingerprint = (hash >> Log2BucketCount) & 0xffffffff;
-        // Zero fingerprint indicates the slot is free; check and adjust to ensure
-        // that regular fingerprints are non-zero.
-        if (fingerprint == 0) {
-            fingerprint = 1;
-        }
-
-        for (int slotIndex = 0; slotIndex < BucketSize; ++slotIndex) {
-            auto currentFingerprint = Fingerprints_[bucketIndex][slotIndex].load(std::memory_order_relaxed);
-            if (currentFingerprint == fingerprint) {
-                return MemoryTags_[bucketIndex][slotIndex].load();
-            }
-        }
-
-        auto guard = Guard(BucketLocks_[bucketIndex]);
-
-        int spareSlotIndex = -1;
-        for (int slotIndex = 0; slotIndex < BucketSize; ++slotIndex) {
-            auto currentFingerprint = Fingerprints_[bucketIndex][slotIndex].load(std::memory_order_relaxed);
-            if (currentFingerprint == fingerprint) {
-                return MemoryTags_[bucketIndex][slotIndex];
-            }
-            if (currentFingerprint == 0) {
-                spareSlotIndex = slotIndex;
-                break;
-            }
-        }
-
-        if (spareSlotIndex < 0) {
-            return AllocationProfilingUnknownMemoryTag;
-        }
-
-        auto memoryTag = CurrentMemoryTag_++;
-        if (memoryTag >= AllocationProfilingMemoryTagBase + MaxCapturedAllocationBacktraces) {
-            return AllocationProfilingUnknownMemoryTag;
-        }
-
-        MemoryTags_[bucketIndex][spareSlotIndex].store(memoryTag);
-        Fingerprints_[bucketIndex][spareSlotIndex].store(fingerprint);
-
-        auto& entry = Backtraces_[memoryTag - AllocationProfilingMemoryTagBase];
-        entry.Backtrace.FrameCount = frameCount;
-        ::memcpy(entry.Backtrace.Frames.data(), frames, sizeof (void*) * frameCount);
-        entry.Captured.store(true);
-
-        return memoryTag;
-    }
-};
-
-TExplicitlyConstructableSingleton<TBacktraceManager> BacktraceManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Mimics the counters of TThreadState but uses std::atomic to survive concurrent access.
-struct TGlobalState
-    : public TGlobalShardedState
-{
-    TTotalCounters<std::atomic<ssize_t>> TotalCounters;
-    std::array<TGlobalLargeCounters, LargeRankCount> LargeArenaCounters;
-    TGlobalUndumpableCounters UndumpableCounters;
-};
-
-TExplicitlyConstructableSingleton<TGlobalState> GlobalState;
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Accumulates various allocation statistics.
-class TStatisticsManager
-{
-public:
-    template <EAllocationKind Kind = EAllocationKind::Tagged, class TState>
-    static Y_FORCE_INLINE void IncrementTotalCounter(TState* state, TMemoryTag tag, EBasicCounter counter, ssize_t delta)
-    {
-        // This branch is typically resolved at compile time.
-        if (Kind == EAllocationKind::Tagged && tag != NullMemoryTag) {
-            IncrementTaggedTotalCounter(&state->TotalCounters, tag, counter, delta);
-        } else {
-            IncrementUntaggedTotalCounter(&state->TotalCounters, counter, delta);
-        }
-    }
-
-    static Y_FORCE_INLINE void IncrementTotalCounter(TMemoryTag tag, EBasicCounter counter, ssize_t delta)
-    {
-        IncrementTotalCounter(GlobalState.Get(), tag, counter, delta);
-    }
-
-    void IncrementSmallArenaCounter(ESmallArenaCounter counter, size_t rank, ssize_t delta)
-    {
-        SmallArenaCounters_[rank][counter] += delta;
-    }
-
-    template <class TState>
-    static Y_FORCE_INLINE void IncrementLargeArenaCounter(TState* state, size_t rank, ELargeArenaCounter counter, ssize_t delta)
-    {
-        state->LargeArenaCounters[rank][counter] += delta;
-    }
-
-    template <class TState>
-    static Y_FORCE_INLINE void IncrementUndumpableCounter(TState* state, EUndumpableCounter counter, ssize_t delta)
-    {
-        state->UndumpableCounters[counter] += delta;
-    }
-
-    void IncrementHugeCounter(EHugeCounter counter, ssize_t delta)
-    {
-        HugeCounters_[counter] += delta;
-    }
-
-    void IncrementHugeUndumpableCounter(EUndumpableCounter counter, ssize_t delta)
-    {
-        HugeUndumpableCounters_[counter] += delta;
-    }
-
-    void IncrementSystemCounter(ESystemCounter counter, ssize_t delta)
-    {
-        SystemCounters_[counter] += delta;
-    }
-
-    // Computes memory usage for a list of tags by aggregating counters across threads.
-    void GetTaggedMemoryCounters(const TMemoryTag* tags, size_t count, TEnumIndexedArray<EBasicCounter, ssize_t>* counters)
-    {
-        TMemoryTagGuard guard(NullMemoryTag);
-
-        for (size_t index = 0; index < count; ++index) {
-            counters[index][EBasicCounter::BytesAllocated] = 0;
-            counters[index][EBasicCounter::BytesFreed] = 0;
-        }
-
-        for (size_t index = 0; index < count; ++index) {
-            auto tag = tags[index];
-            counters[index][EBasicCounter::BytesAllocated] += LoadTaggedTotalCounter(GlobalState->TotalCounters, tag, EBasicCounter::BytesAllocated);
-            counters[index][EBasicCounter::BytesFreed] += LoadTaggedTotalCounter(GlobalState->TotalCounters, tag, EBasicCounter::BytesFreed);
-        }
-
-        ThreadManager->EnumerateThreadStatesAsync(
-            [&] (const auto* state) {
-                for (size_t index = 0; index < count; ++index) {
-                    auto tag = tags[index];
-                    counters[index][EBasicCounter::BytesAllocated] += LoadTaggedTotalCounter(state->TotalCounters, tag, EBasicCounter::BytesAllocated);
-                    counters[index][EBasicCounter::BytesFreed] += LoadTaggedTotalCounter(state->TotalCounters, tag, EBasicCounter::BytesFreed);
-                }
-            });
-
-        for (size_t index = 0; index < count; ++index) {
-            counters[index][EBasicCounter::BytesUsed] = GetUsed(counters[index][EBasicCounter::BytesAllocated], counters[index][EBasicCounter::BytesFreed]);
-        }
-    }
-
-    void GetTaggedMemoryUsage(const TMemoryTag* tags, size_t count, size_t* results)
-    {
-        TMemoryTagGuard guard(NullMemoryTag);
-
-        std::vector<TEnumIndexedArray<EBasicCounter, ssize_t>> counters;
-        counters.resize(count);
-        GetTaggedMemoryCounters(tags, count, counters.data());
-
-        for (size_t index = 0; index < count; ++index) {
-            results[index] = counters[index][EBasicCounter::BytesUsed];
-        }
-    }
-
-    TEnumIndexedArray<ETotalCounter, ssize_t> GetTotalAllocationCounters()
-    {
-        TEnumIndexedArray<ETotalCounter, ssize_t> result;
-
-        auto accumulate = [&] (const auto& counters) {
-            result[ETotalCounter::BytesAllocated] += LoadCounter(counters[EBasicCounter::BytesAllocated]);
-            result[ETotalCounter::BytesFreed] += LoadCounter(counters[EBasicCounter::BytesFreed]);
-        };
-
-        accumulate(GlobalState->TotalCounters.UntaggedCounters);
-        accumulate(GlobalState->TotalCounters.CumulativeTaggedCounters);
-
-        ThreadManager->EnumerateThreadStatesAsync(
-            [&] (const auto* state) {
-                accumulate(state->TotalCounters.UntaggedCounters);
-                accumulate(state->TotalCounters.CumulativeTaggedCounters);
-            });
-
-        result[ETotalCounter::BytesUsed] = GetUsed(
-            result[ETotalCounter::BytesAllocated],
-            result[ETotalCounter::BytesFreed]);
-
-        auto systemCounters = GetSystemAllocationCounters();
-        result[ETotalCounter::BytesCommitted] += systemCounters[EBasicCounter::BytesUsed];
-
-        auto hugeCounters = GetHugeAllocationCounters();
-        result[ETotalCounter::BytesCommitted] += hugeCounters[EHugeCounter::BytesUsed];
-
-        auto smallArenaCounters = GetSmallArenaAllocationCounters();
-        for (size_t rank = 0; rank < SmallRankCount; ++rank) {
-            result[ETotalCounter::BytesCommitted] += smallArenaCounters[rank][ESmallArenaCounter::BytesCommitted];
-        }
-
-        auto largeArenaCounters = GetLargeArenaAllocationCounters();
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            result[ETotalCounter::BytesCommitted] += largeArenaCounters[rank][ELargeArenaCounter::BytesCommitted];
-        }
-
-        result[ETotalCounter::BytesUnaccounted] = std::max<ssize_t>(GetProcessRss() - result[ETotalCounter::BytesCommitted], 0);
-
-        return result;
-    }
-
-    TEnumIndexedArray<ESmallCounter, ssize_t> GetSmallAllocationCounters()
-    {
-        TEnumIndexedArray<ESmallCounter, ssize_t> result;
-
-        auto totalCounters = GetTotalAllocationCounters();
-        result[ESmallCounter::BytesAllocated] = totalCounters[ETotalCounter::BytesAllocated];
-        result[ESmallCounter::BytesFreed] = totalCounters[ETotalCounter::BytesFreed];
-        result[ESmallCounter::BytesUsed] = totalCounters[ETotalCounter::BytesUsed];
-
-        auto largeArenaCounters = GetLargeArenaAllocationCounters();
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            result[ESmallCounter::BytesAllocated] -= largeArenaCounters[rank][ELargeArenaCounter::BytesAllocated];
-            result[ESmallCounter::BytesFreed] -= largeArenaCounters[rank][ELargeArenaCounter::BytesFreed];
-            result[ESmallCounter::BytesUsed] -= largeArenaCounters[rank][ELargeArenaCounter::BytesUsed];
-        }
-
-        auto hugeCounters = GetHugeAllocationCounters();
-        result[ESmallCounter::BytesAllocated] -= hugeCounters[EHugeCounter::BytesAllocated];
-        result[ESmallCounter::BytesFreed] -= hugeCounters[EHugeCounter::BytesFreed];
-        result[ESmallCounter::BytesUsed] -= hugeCounters[EHugeCounter::BytesUsed];
-
-        return result;
-    }
-
-    std::array<TLocalSmallCounters, SmallRankCount> GetSmallArenaAllocationCounters()
-    {
-        std::array<TLocalSmallCounters, SmallRankCount> result;
-        for (size_t rank = 0; rank < SmallRankCount; ++rank) {
-            for (auto counter : TEnumTraits<ESmallArenaCounter>::GetDomainValues()) {
-                result[rank][counter] = SmallArenaCounters_[rank][counter].load();
-            }
-        }
-        return result;
-    }
-
-    TEnumIndexedArray<ELargeCounter, ssize_t> GetLargeAllocationCounters()
-    {
-        TEnumIndexedArray<ELargeCounter, ssize_t> result;
-        auto largeArenaCounters = GetLargeArenaAllocationCounters();
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            result[ESmallCounter::BytesAllocated] += largeArenaCounters[rank][ELargeArenaCounter::BytesAllocated];
-            result[ESmallCounter::BytesFreed] += largeArenaCounters[rank][ELargeArenaCounter::BytesFreed];
-            result[ESmallCounter::BytesUsed] += largeArenaCounters[rank][ELargeArenaCounter::BytesUsed];
-        }
-        return result;
-    }
-
-    std::array<TLocalLargeCounters, LargeRankCount> GetLargeArenaAllocationCounters()
-    {
-        std::array<TLocalLargeCounters, LargeRankCount> result{};
-
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            for (auto counter : TEnumTraits<ELargeArenaCounter>::GetDomainValues()) {
-                result[rank][counter] = GlobalState->LargeArenaCounters[rank][counter].load();
-            }
-        }
-
-        ThreadManager->EnumerateThreadStatesAsync(
-            [&] (const auto* state) {
-                for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-                    for (auto counter : TEnumTraits<ELargeArenaCounter>::GetDomainValues()) {
-                        result[rank][counter] += state->LargeArenaCounters[rank][counter];
-                    }
-                }
-            });
-
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            result[rank][ELargeArenaCounter::BytesUsed] = GetUsed(result[rank][ELargeArenaCounter::BytesAllocated], result[rank][ELargeArenaCounter::BytesFreed]);
-            result[rank][ELargeArenaCounter::BlobsUsed] = GetUsed(result[rank][ELargeArenaCounter::BlobsAllocated], result[rank][ELargeArenaCounter::BlobsFreed]);
-        }
-
-        return result;
-    }
-
-    TLocalSystemCounters GetSystemAllocationCounters()
-    {
-        TLocalSystemCounters result;
-        for (auto counter : TEnumTraits<ESystemCounter>::GetDomainValues()) {
-            result[counter] = SystemCounters_[counter].load();
-        }
-        result[ESystemCounter::BytesUsed] = GetUsed(result[ESystemCounter::BytesAllocated], result[ESystemCounter::BytesFreed]);
-        return result;
-    }
-
-    TLocalHugeCounters GetHugeAllocationCounters()
-    {
-        TLocalHugeCounters result;
-        for (auto counter : TEnumTraits<EHugeCounter>::GetDomainValues()) {
-            result[counter] = HugeCounters_[counter].load();
-        }
-        result[EHugeCounter::BytesUsed] = GetUsed(result[EHugeCounter::BytesAllocated], result[EHugeCounter::BytesFreed]);
-        result[EHugeCounter::BlobsUsed] = GetUsed(result[EHugeCounter::BlobsAllocated], result[EHugeCounter::BlobsFreed]);
-        return result;
-    }
-
-    TLocalUndumpableCounters GetUndumpableAllocationCounters()
-    {
-        TLocalUndumpableCounters result;
-        for (auto counter : TEnumTraits<EUndumpableCounter>::GetDomainValues()) {
-            result[counter] = HugeUndumpableCounters_[counter].load();
-            result[counter] += GlobalState->UndumpableCounters[counter].load();
-        }
-
-        ThreadManager->EnumerateThreadStatesAsync(
-            [&] (const auto* state) {
-                result[EUndumpableCounter::BytesAllocated] += LoadCounter(state->UndumpableCounters[EUndumpableCounter::BytesAllocated]);
-                result[EUndumpableCounter::BytesFreed] += LoadCounter(state->UndumpableCounters[EUndumpableCounter::BytesFreed]);
-            });
-
-        result[EUndumpableCounter::BytesUsed] = GetUsed(result[EUndumpableCounter::BytesAllocated], result[EUndumpableCounter::BytesFreed]);
-        return result;
-    }
-
-    // Called before TThreadState is destroyed.
-    // Adds the counter values from TThreadState to the global counters.
-    void AccumulateLocalCounters(TThreadState* state)
-    {
-        for (auto counter : TEnumTraits<EBasicCounter>::GetDomainValues()) {
-            GlobalState->TotalCounters.CumulativeTaggedCounters[counter] += state->TotalCounters.CumulativeTaggedCounters[counter];
-            GlobalState->TotalCounters.UntaggedCounters[counter] += state->TotalCounters.UntaggedCounters[counter];
-        }
-        for (size_t index = 0; index < MaxTaggedCounterSets; ++index) {
-            const auto* localSet = state->TotalCounters.FindTaggedCounterSet(index);
-            if (!localSet) {
-                continue;
-            }
-            auto* globalSet = GlobalState->TotalCounters.GetOrCreateTaggedCounterSet(index);
-            for (size_t jndex = 0; jndex < TaggedCounterSetSize; ++jndex) {
-                for (auto counter : TEnumTraits<EBasicCounter>::GetDomainValues()) {
-                    globalSet->Counters[jndex][counter] += localSet->Counters[jndex][counter];
-                }
-            }
-        }
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            for (auto counter : TEnumTraits<ELargeArenaCounter>::GetDomainValues()) {
-                GlobalState->LargeArenaCounters[rank][counter] += state->LargeArenaCounters[rank][counter];
-            }
-        }
-        for (auto counter : TEnumTraits<EUndumpableCounter>::GetDomainValues()) {
-            GlobalState->UndumpableCounters[counter] += state->UndumpableCounters[counter];
-        }
-    }
-
-private:
-    template <class TCounter>
-    static ssize_t LoadTaggedTotalCounter(const TTotalCounters<TCounter>& counters, TMemoryTag tag, EBasicCounter counter)
-    {
-        const auto* set = counters.FindTaggedCounterSet(tag / TaggedCounterSetSize);
-        if (Y_UNLIKELY(!set)) {
-            return 0;
-        }
-        return LoadCounter(set->Counters[tag % TaggedCounterSetSize][counter]);
-    }
-
-    template <class TCounter>
-    static Y_FORCE_INLINE void IncrementUntaggedTotalCounter(TTotalCounters<TCounter>* counters, EBasicCounter counter, ssize_t delta)
-    {
-        counters->UntaggedCounters[counter] += delta;
-    }
-
-    template <class TCounter>
-    static Y_FORCE_INLINE void IncrementTaggedTotalCounter(TTotalCounters<TCounter>* counters, TMemoryTag tag, EBasicCounter counter, ssize_t delta)
-    {
-        counters->CumulativeTaggedCounters[counter] += delta;
-        auto* set = counters->GetOrCreateTaggedCounterSet(tag / TaggedCounterSetSize);
-        set->Counters[tag % TaggedCounterSetSize][counter] += delta;
-    }
-
-
-    static ssize_t GetProcessRss()
-    {
-        auto* file = ::fopen("/proc/self/statm", "r");
-        if (!file) {
-            return 0;
-        }
-
-        ssize_t dummy;
-        ssize_t rssPages;
-        auto readResult = fscanf(file, "%zd %zd", &dummy, &rssPages);
-
-        ::fclose(file);
-
-        if (readResult != 2) {
-            return 0;
-        }
-
-        return rssPages * PageSize;
-    }
-
-private:
-    TGlobalSystemCounters SystemCounters_;
-    std::array<TGlobalSmallCounters, SmallRankCount> SmallArenaCounters_;
-    TGlobalHugeCounters HugeCounters_;
-    TGlobalUndumpableCounters HugeUndumpableCounters_;
-};
-
-TExplicitlyConstructableSingleton<TStatisticsManager> StatisticsManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-void* TSystemAllocator::Allocate(size_t size)
-{
-    auto rawSize = GetRawBlobSize<TSystemBlobHeader>(size);
-    void* mmappedPtr;
-    while (true) {
-        auto currentPtr = CurrentPtr_.fetch_add(rawSize);
-        Y_ABORT_UNLESS(currentPtr + rawSize <= SystemZoneEnd);
-        mmappedPtr = MappedMemoryManager->Map(
-            currentPtr,
-            rawSize,
-            MAP_FIXED_NOREPLACE | MAP_POPULATE);
-        if (mmappedPtr == reinterpret_cast<void*>(currentPtr)) {
-            break;
-        }
-        if (mmappedPtr != MAP_FAILED) {
-            MappedMemoryManager->Unmap(mmappedPtr, rawSize);
-        }
-    }
-    auto* blob = static_cast<TSystemBlobHeader*>(mmappedPtr);
-    new (blob) TSystemBlobHeader(size);
-    auto* result = HeaderToPtr(blob);
-    PoisonUninitializedRange(result, size);
-    StatisticsManager->IncrementSystemCounter(ESystemCounter::BytesAllocated, rawSize);
-    return result;
-}
-
-void TSystemAllocator::Free(void* ptr)
-{
-    auto* blob = PtrToHeader<TSystemBlobHeader>(ptr);
-    auto rawSize = GetRawBlobSize<TSystemBlobHeader>(blob->Size);
-    MappedMemoryManager->Unmap(blob, rawSize);
-    StatisticsManager->IncrementSystemCounter(ESystemCounter::BytesFreed, rawSize);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Small allocator
-//
-// Allocations (called small chunks) are grouped by their sizes. Two most-significant binary digits are
-// used to determine the rank of a chunk, which guarantees 25% overhead in the worst case.
-// A pair of helper arrays (SizeToSmallRank1 and SizeToSmallRank2) are used to compute ranks; we expect
-// them to be permanently cached.
-//
-// Chunks of the same rank are served by a (small) arena allocator.
-// In fact, there are two arenas for each rank: one is for tagged allocations and another is for untagged ones.
-//
-// We encode chunk's rank and whether it is tagged or not in the resulting pointer as follows:
-//   0- 3:  must be zero due to alignment
-//   4-39:  varies
-//  40-44:  rank
-//     45:  0 for untagged allocations, 1 for tagged ones
-//  45-63:  zeroes
-// This enables computing chunk's rank and also determining if it is tagged in constant time
-// without any additional lookups. Also, one pays no space overhead for untagged allocations
-// and pays 16 bytes for each tagged one.
-//
-// Each arena allocates extents of memory by calling mmap for each extent of SmallExtentSize bytes.
-// (Recall that this memory is never reclaimed.)
-// Each extent is then sliced into segments of SmallSegmentSize bytes.
-// Whenever a new segment is acquired, its memory is pre-faulted by madvise(MADV_POPULATE).
-// New segments are acquired in a lock-free manner.
-//
-// Each thread maintains a separate cache of chunks of each rank (two caches to be precise: one
-// for tagged allocations and the other for untagged). These caches are fully thread-local and
-// involve no atomic operations.
-//
-// There are also global caches (per rank, for tagged and untagged allocations).
-// Instead of keeping individual chunks these work with chunk groups (collections of up to ChunksPerGroup
-// arbitrary chunks).
-//
-// When the local cache becomes exhausted, a group of chunks is fetched from the global cache
-// (if the latter is empty then the arena allocator is consulted).
-// Vice versa, if the local cache overflows, a group of chunks is moved from it to the global cache.
-//
-// Global caches and arena allocators also take care of (rare) cases when Allocate/Free is called
-// without a valid thread state (which happens during thread shutdown when TThreadState is already destroyed).
-//
-// Each arena allocates memory in a certain "data" zone of SmallZoneSize.
-// In addition to that zone, up to two "shadow" zones are maintained.
-//
-// The first one contains memory tags of chunks residing in the primary zone.
-// The second one (which is present if YTALLOC_NERVOUS is defined) contains
-// states of chunks. These states enable some simple internal sanity checks
-// (e.g. detect attempts to double-free a chunk).
-//
-// Addresses in the data zone are directly mapped to offsets in shadow zones.
-// When a segment of a small arena zone is allocated, the relevant portions of shadow
-// zones get initialized (and also accounted for as a system allocation).
-//
-// Shadow zones are memory-mapped with MAP_NORESERVE flag and are quite sparse.
-// These zones are omitted from core dumps due to their huge size and sparsity.
-
-// For each small rank i, gives max K such that 2^k <= SmallRankToSize[i].
-// Chunk pointer is mapped to its shadow image via GetShadowOffset helper.
-// Note that chunk size is not always a power of 2. To avoid costly integer division,
-// chunk pointer is translated by means of bitwise shift only (leaving some bytes
-// of shadow zones unused). This array provides the needed shifts.
-constexpr int SmallRankToLogSize[SmallRankCount] = {
-    0,
-    4, 5, 5, 6, 6, 7,
-    7, 8, 8, 9, 9, 10, 10, 11,
-    11, 12, 12, 13, 13, 14, 14, 15
-};
-
-enum class ESmallChunkState : ui8
-{
-    Spare         = 0,
-    Allocated     = 0x61, // a
-    Freed         = 0x66  // f
-};
-
-class TSmallArenaAllocator
-{
-public:
-    TSmallArenaAllocator(EAllocationKind kind, size_t rank, uintptr_t dataZoneStart)
-        : Kind_(kind)
-        , Rank_(rank)
-        , LogSize_(SmallRankToLogSize[Rank_])
-        , ChunkSize_(SmallRankToSize[Rank_])
-        , DataZoneStart_(dataZoneStart)
-        , DataZoneAllocator_(DataZoneStart_, DataZoneStart_ + SmallZoneSize)
-    { }
-
-    size_t PullMany(void** batch, size_t maxCount)
-    {
-        size_t count;
-        while (true) {
-            count = TryAllocateFromCurrentExtent(batch, maxCount);
-            if (Y_LIKELY(count != 0)) {
-                break;
-            }
-            PopulateAnotherExtent();
-        }
-        return count;
-    }
-
-    void* Allocate(size_t size)
-    {
-        void* ptr;
-        auto count = PullMany(&ptr, 1);
-        YTALLOC_PARANOID_ASSERT(count == 1);
-        YTALLOC_PARANOID_ASSERT(PtrToSmallRank(ptr) == Rank_);
-        PoisonUninitializedRange(ptr, size);
-        UpdateChunkState(ptr, ESmallChunkState::Freed, ESmallChunkState::Allocated);
-        return ptr;
-    }
-
-    TMemoryTag GetAndResetMemoryTag(const void* ptr)
-    {
-        auto& tag = MemoryTagZoneStart_[GetShadowOffset(ptr)];
-        auto currentTag = tag;
-        tag = NullMemoryTag;
-        return currentTag;
-    }
-
-    void SetMemoryTag(void* ptr, TMemoryTag tag)
-    {
-        MemoryTagZoneStart_[GetShadowOffset(ptr)] = tag;
-    }
-
-    void UpdateChunkState(const void* ptr, ESmallChunkState expectedState, ESmallChunkState newState)
-    {
-#ifdef YTALLOC_NERVOUS
-        auto& state = ChunkStateZoneStart_[GetShadowOffset(ptr)];
-        auto actualState = state;
-        if (Y_UNLIKELY(actualState != expectedState)) {
-            char message[256];
-            snprintf(message, sizeof(message), "Invalid small chunk state at %p: expected %" PRIx8 ", actual %" PRIx8,
-                ptr,
-                static_cast<ui8>(expectedState),
-                static_cast<ui8>(actualState));
-            YTALLOC_TRAP(message);
-        }
-        state = newState;
-#else
-        Y_UNUSED(ptr);
-        Y_UNUSED(expectedState);
-        Y_UNUSED(newState);
-#endif
-    }
-
-private:
-    size_t TryAllocateFromCurrentExtent(void** batch, size_t maxCount)
-    {
-        auto* oldPtr = CurrentPtr_.load();
-        if (Y_UNLIKELY(!oldPtr)) {
-            return 0;
-        }
-
-        auto* currentExtent = CurrentExtent_.load(std::memory_order_relaxed);
-        if (Y_UNLIKELY(!currentExtent)) {
-            return 0;
-        }
-
-        char* newPtr;
-        while (true) {
-            if (Y_UNLIKELY(oldPtr < currentExtent || oldPtr + ChunkSize_ + RightReadableAreaSize > currentExtent + SmallExtentSize)) {
-                return 0;
-            }
-
-            newPtr = std::min(
-                oldPtr + ChunkSize_ * maxCount,
-                currentExtent + SmallExtentSize);
-
-            auto* alignedNewPtr = AlignDownToSmallSegment(currentExtent, newPtr);
-            if (alignedNewPtr > oldPtr) {
-                newPtr = alignedNewPtr;
-            }
-
-            if (Y_LIKELY(CurrentPtr_.compare_exchange_weak(oldPtr, newPtr))) {
-                break;
-            }
-        }
-
-        auto* firstSegment = AlignUpToSmallSegment(currentExtent, oldPtr);
-        auto* nextSegment = AlignUpToSmallSegment(currentExtent, newPtr);
-        if (firstSegment != nextSegment) {
-            auto size = nextSegment - firstSegment;
-            MappedMemoryManager->PopulateReadOnly(firstSegment, size);
-
-            StatisticsManager->IncrementSmallArenaCounter(ESmallArenaCounter::BytesCommitted, Rank_, size);
-            StatisticsManager->IncrementSmallArenaCounter(ESmallArenaCounter::PagesCommitted, Rank_, size / PageSize);
-            if (Kind_ == EAllocationKind::Tagged) {
-                StatisticsManager->IncrementSystemCounter(ESystemCounter::BytesAllocated, size / ChunkSize_ * sizeof(TMemoryTag));
-            }
-#ifdef YTALLOC_NERVOUS
-            StatisticsManager->IncrementSystemCounter(ESystemCounter::BytesAllocated, size / ChunkSize_ * sizeof(ESmallChunkState));
-#endif
-        }
-
-        size_t count = 0;
-        while (oldPtr != newPtr) {
-            UpdateChunkState(oldPtr, ESmallChunkState::Spare, ESmallChunkState::Freed);
-
-            batch[count] = oldPtr;
-
-            oldPtr += ChunkSize_;
-            count++;
-        }
-        return count;
-    }
-
-    void PopulateAnotherExtent()
-    {
-        auto lockGuard = GuardWithTiming(ExtentLock_);
-
-        auto* currentPtr = CurrentPtr_.load();
-        auto* currentExtent = CurrentExtent_.load();
-
-        if (currentPtr && currentPtr + ChunkSize_ + RightReadableAreaSize <= currentExtent + SmallExtentSize) {
-            // No need for a new extent.
-            return;
-        }
-
-        auto* newExtent = static_cast<char*>(DataZoneAllocator_.Allocate(SmallExtentAllocSize, 0));
-
-        AllocateShadowZones();
-
-        YTALLOC_VERIFY(reinterpret_cast<uintptr_t>(newExtent) % SmallExtentAllocSize == 0);
-        CurrentPtr_ = CurrentExtent_ = newExtent;
-
-        StatisticsManager->IncrementSmallArenaCounter(ESmallArenaCounter::BytesMapped, Rank_, SmallExtentAllocSize);
-        StatisticsManager->IncrementSmallArenaCounter(ESmallArenaCounter::PagesMapped, Rank_, SmallExtentAllocSize / PageSize);
-    }
-
-private:
-    const EAllocationKind Kind_;
-    const size_t Rank_;
-    const size_t LogSize_;
-    const size_t ChunkSize_;
-    const uintptr_t DataZoneStart_;
-
-    TZoneAllocator DataZoneAllocator_;
-
-    bool ShadowZonesAllocated_ = false;
-    TMemoryTag* MemoryTagZoneStart_;
-#ifdef YTALLOC_NERVOUS
-    ESmallChunkState* ChunkStateZoneStart_;
-#endif
-
-    NThreading::TForkAwareSpinLock ExtentLock_;
-    std::atomic<char*> CurrentPtr_ = nullptr;
-    std::atomic<char*> CurrentExtent_ = nullptr;
-
-    size_t GetShadowOffset(const void* ptr)
-    {
-        return (reinterpret_cast<uintptr_t>(ptr) - DataZoneStart_) >> LogSize_;
-    }
-
-    void AllocateShadowZones()
-    {
-        if (ShadowZonesAllocated_) {
-            return;
-        }
-
-        if (Kind_ == EAllocationKind::Tagged) {
-            MemoryTagZoneStart_ = MapShadowZone<TMemoryTag>();
-        }
-#ifdef YTALLOC_NERVOUS
-        ChunkStateZoneStart_ = MapShadowZone<ESmallChunkState>();
-#endif
-
-        ShadowZonesAllocated_ = true;
-    }
-
-    template <class T>
-    T* MapShadowZone()
-    {
-        auto size = AlignUp((SmallZoneSize >> LogSize_) * sizeof (T), PageSize);
-        auto* ptr = static_cast<T*>(MappedMemoryManager->Map(SystemZoneStart, size, MAP_NORESERVE));
-        MappedMemoryManager->DontDump(ptr, size);
-        return ptr;
-    }
-};
-
-TExplicitlyConstructableSingleton<TEnumIndexedArray<EAllocationKind, std::array<TExplicitlyConstructableSingleton<TSmallArenaAllocator>, SmallRankCount>>> SmallArenaAllocators;
-
-////////////////////////////////////////////////////////////////////////////////
-
-constexpr size_t ChunksPerGroup = 128;
-constexpr size_t GroupsBatchSize = 1024;
-
-static_assert(ChunksPerGroup <= MaxCachedChunksPerRank, "ChunksPerGroup > MaxCachedChunksPerRank");
-
-class TChunkGroup
-    : public TFreeListItemBase<TChunkGroup>
-{
-public:
-    bool IsEmpty() const
-    {
-        return Size_ == 0;
-    }
-
-    size_t ExtractAll(void** ptrs)
-    {
-        auto count = Size_;
-        ::memcpy(ptrs, Ptrs_.data(), count * sizeof(void*));
-        Size_ = 0;
-        return count;
-    }
-
-    void PutOne(void* ptr)
-    {
-        PutMany(&ptr, 1);
-    }
-
-    void PutMany(void** ptrs, size_t count)
-    {
-        YTALLOC_PARANOID_ASSERT(Size_ == 0);
-        YTALLOC_PARANOID_ASSERT(count <= ChunksPerGroup);
-        ::memcpy(Ptrs_.data(), ptrs, count * sizeof(void*));
-        Size_ = count;
-    }
-
-private:
-    size_t Size_ = 0; // <= ChunksPerGroup
-    std::array<void*, ChunksPerGroup> Ptrs_;
-};
-
-class TGlobalSmallChunkCache
-{
-public:
-    explicit TGlobalSmallChunkCache(EAllocationKind kind)
-        : Kind_(kind)
-    { }
-
-#ifdef YTALLOC_PARANOID
-    void CanonizeChunkPtrs(TThreadState* state, size_t rank)
-    {
-        auto& chunkPtrPtr = state->SmallBlobCache[Kind_].RankToCachedChunkPtrHead[rank];
-
-        auto leftBorder = state->SmallBlobCache[Kind_].RankToCachedChunkLeftBorder[rank];
-        auto rightBorder = state->SmallBlobCache[Kind_].RankToCachedChunkRightBorder[rank];
-
-        state->SmallBlobCache[Kind_].CachedChunkFull[rank] = false;
-        if (chunkPtrPtr + 1 == rightBorder) {
-            chunkPtrPtr = leftBorder;
-            state->SmallBlobCache[Kind_].CachedChunkFull[rank] = true;
-        }
-
-        state->SmallBlobCache[Kind_].RankToCachedChunkPtrTail[rank] = leftBorder;
-    }
-#endif
-
-    bool TryMoveGroupToLocal(TThreadState* state, size_t rank)
-    {
-        auto& groups = RankToChunkGroups_[rank];
-        auto* group = groups.Extract(state);
-        if (!Y_LIKELY(group)) {
-            return false;
-        }
-
-        YTALLOC_PARANOID_ASSERT(!group->IsEmpty());
-
-        auto& chunkPtrPtr = state->SmallBlobCache[Kind_].RankToCachedChunkPtrHead[rank];
-#ifdef YTALLOC_PARANOID
-        chunkPtrPtr = state->SmallBlobCache[Kind_].RankToCachedChunkLeftBorder[rank];
-        state->SmallBlobCache[Kind_].RankToCachedChunkPtrTail[rank] = chunkPtrPtr;
-#endif
-        auto chunkCount = group->ExtractAll(chunkPtrPtr + 1);
-        chunkPtrPtr += chunkCount;
-
-#ifdef YTALLOC_PARANOID
-        CanonizeChunkPtrs(state, rank);
-#endif
-        GroupPool_.Free(state, group);
-        return true;
-    }
-
-    void MoveGroupToGlobal(TThreadState* state, size_t rank)
-    {
-        auto* group = GroupPool_.Allocate(state);
-
-        auto& chunkPtrPtr = state->SmallBlobCache[Kind_].RankToCachedChunkPtrHead[rank];
-        YTALLOC_PARANOID_ASSERT(*(chunkPtrPtr + 1) == reinterpret_cast<void*>(TThreadState::RightSentinel));
-        group->PutMany(chunkPtrPtr - ChunksPerGroup + 1, ChunksPerGroup);
-        chunkPtrPtr -= ChunksPerGroup;
-#ifdef YTALLOC_PARANOID
-        ::memset(chunkPtrPtr + 1, 0, sizeof(void*) * ChunksPerGroup);
-        CanonizeChunkPtrs(state, rank);
-#endif
-
-        auto& groups = RankToChunkGroups_[rank];
-        YTALLOC_PARANOID_ASSERT(!group->IsEmpty());
-        groups.Put(state, group);
-    }
-
-    void MoveOneToGlobal(void* ptr, size_t rank)
-    {
-        auto* group = GroupPool_.Allocate(&GlobalShardedState_);
-        group->PutOne(ptr);
-
-        auto& groups = RankToChunkGroups_[rank];
-        YTALLOC_PARANOID_ASSERT(!group->IsEmpty());
-        groups.Put(&GlobalShardedState_, group);
-    }
-
-#ifdef YTALLOC_PARANOID
-    void MoveAllToGlobal(TThreadState* state, size_t rank)
-    {
-        auto leftSentinelBorder = state->SmallBlobCache[Kind_].RankToCachedChunkLeftBorder[rank];
-        auto rightSentinelBorder = state->SmallBlobCache[Kind_].RankToCachedChunkRightBorder[rank];
-
-        auto& headPtr = state->SmallBlobCache[Kind_].RankToCachedChunkPtrHead[rank];
-        auto& tailPtr = state->SmallBlobCache[Kind_].RankToCachedChunkPtrTail[rank];
-
-        if (tailPtr == headPtr && !state->SmallBlobCache[Kind_].CachedChunkFull[rank]) {
-            headPtr = leftSentinelBorder;
-            return;
-        }
-
-        // (leftBorder, rightBorder]
-        auto moveIntervalToGlobal = [=] (void** leftBorder, void** rightBorder) {
-            while (true) {
-                size_t count = 0;
-                while (count < ChunksPerGroup && rightBorder != leftBorder) {
-                    --rightBorder;
-                    ++count;
-                }
-
-                if (count == 0) {
-                    break;
-                }
-
-                auto* group = GroupPool_.Allocate(state);
-                group->PutMany(rightBorder + 1, count);
-                ::memset(rightBorder + 1, 0, sizeof(void*) * count);
-                auto& groups = RankToChunkGroups_[rank];
-                groups.Put(state, group);
-            }
-        };
-
-        if (tailPtr >= headPtr) {
-            moveIntervalToGlobal(tailPtr, rightSentinelBorder - 1);
-            moveIntervalToGlobal(leftSentinelBorder, headPtr);
-        } else {
-            moveIntervalToGlobal(tailPtr, headPtr);
-        }
-
-        headPtr = leftSentinelBorder;
-    }
-#else
-    void MoveAllToGlobal(TThreadState* state, size_t rank)
-    {
-        auto& chunkPtrPtr = state->SmallBlobCache[Kind_].RankToCachedChunkPtrHead[rank];
-        while (true) {
-            size_t count = 0;
-            while (count < ChunksPerGroup && *chunkPtrPtr != reinterpret_cast<void*>(TThreadState::LeftSentinel)) {
-                --chunkPtrPtr;
-                ++count;
-            }
-
-            if (count == 0) {
-                break;
-            }
-
-            auto* group = GroupPool_.Allocate(state);
-            group->PutMany(chunkPtrPtr + 1, count);
-            auto& groups = RankToChunkGroups_[rank];
-            groups.Put(state, group);
-        }
-    }
-#endif
-
-private:
-    const EAllocationKind Kind_;
-
-    TGlobalShardedState GlobalShardedState_;
-    TShardedSystemPool<TChunkGroup, GroupsBatchSize> GroupPool_;
-    std::array<TShardedFreeList<TChunkGroup>, SmallRankCount> RankToChunkGroups_;
-};
-
-TExplicitlyConstructableSingleton<TEnumIndexedArray<EAllocationKind, TExplicitlyConstructableSingleton<TGlobalSmallChunkCache>>> GlobalSmallChunkCaches;
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TSmallAllocator
-{
-public:
-    template <EAllocationKind Kind>
-    static Y_FORCE_INLINE void* Allocate(TMemoryTag tag, size_t rank)
-    {
-        auto* state = TThreadManager::FindThreadState();
-        if (Y_LIKELY(state)) {
-            return Allocate<Kind>(tag, rank, state);
-        }
-        auto size = SmallRankToSize[rank];
-        return AllocateGlobal<Kind>(tag, rank, size);
-    }
-
-#ifdef YTALLOC_PARANOID
-    template <EAllocationKind Kind>
-    static Y_FORCE_INLINE void* Allocate(TMemoryTag tag, size_t rank, TThreadState* state)
-    {
-        auto& localCache = state->SmallBlobCache[Kind];
-        auto& allocator = *(*SmallArenaAllocators)[Kind][rank];
-
-        size_t size = SmallRankToSize[rank];
-        StatisticsManager->IncrementTotalCounter<Kind>(state, tag, EBasicCounter::BytesAllocated, size);
-
-        auto leftBorder = localCache.RankToCachedChunkLeftBorder[rank];
-        auto rightBorder = localCache.RankToCachedChunkRightBorder[rank];
-
-        void* result;
-        while (true) {
-            auto& chunkHeadPtr = localCache.RankToCachedChunkPtrHead[rank];
-            auto& cachedHeadPtr = *(chunkHeadPtr + 1);
-            auto* headPtr = cachedHeadPtr;
-
-            auto& chunkTailPtr = localCache.RankToCachedChunkPtrTail[rank];
-            auto& cachedTailPtr = *(chunkTailPtr + 1);
-            auto* tailPtr = cachedTailPtr;
-
-            auto& chunkFull = localCache.CachedChunkFull[rank];
-
-            if (Y_LIKELY(chunkFull || headPtr != tailPtr)) {
-                YTALLOC_PARANOID_ASSERT(tailPtr);
-                cachedTailPtr = nullptr;
-                ++chunkTailPtr;
-                if (Y_LIKELY(chunkTailPtr + 1 == rightBorder)) {
-                    chunkTailPtr = leftBorder;
-                }
-
-                chunkFull = false;
-                result = tailPtr;
-                PoisonUninitializedRange(result, size);
-                allocator.UpdateChunkState(result, ESmallChunkState::Freed, ESmallChunkState::Allocated);
-                break;
-            }
-
-            auto& globalCache = *(*GlobalSmallChunkCaches)[Kind];
-            if (!globalCache.TryMoveGroupToLocal(state, rank)) {
-                result = allocator.Allocate(size);
-                break;
-            }
-        }
-
-        if constexpr(Kind == EAllocationKind::Tagged) {
-            allocator.SetMemoryTag(result, tag);
-        }
-
-        return result;
-    }
-
-    template <EAllocationKind Kind>
-    static Y_FORCE_INLINE void Free(void* ptr)
-    {
-        auto rank = PtrToSmallRank(ptr);
-        auto size = SmallRankToSize[rank];
-
-        auto& allocator = *(*SmallArenaAllocators)[Kind][rank];
-
-        auto tag = NullMemoryTag;
-        if constexpr(Kind == EAllocationKind::Tagged) {
-            tag = allocator.GetAndResetMemoryTag(ptr);
-            YTALLOC_PARANOID_ASSERT(tag != NullMemoryTag);
-        }
-
-        allocator.UpdateChunkState(ptr, ESmallChunkState::Allocated, ESmallChunkState::Freed);
-        PoisonFreedRange(ptr, size);
-
-        auto* state = TThreadManager::FindThreadState();
-        if (Y_UNLIKELY(!state)) {
-            FreeGlobal<Kind>(tag, ptr, rank, size);
-            return;
-        }
-
-        StatisticsManager->IncrementTotalCounter<Kind>(state, tag, EBasicCounter::BytesFreed, size);
-
-        auto& localCache = state->SmallBlobCache[Kind];
-
-        auto leftBorder = localCache.RankToCachedChunkLeftBorder[rank];
-        auto rightBorder = localCache.RankToCachedChunkRightBorder[rank];
-
-        while (true) {
-            auto& chunkHeadPtr = localCache.RankToCachedChunkPtrHead[rank];
-            auto& headPtr = *(chunkHeadPtr + 1);
-
-            auto& chunkTailPtr = localCache.RankToCachedChunkPtrTail[rank];
-            auto& chunkFull = localCache.CachedChunkFull[rank];
-
-            if (Y_LIKELY(!chunkFull)) {
-                headPtr = ptr;
-                ++chunkHeadPtr;
-                if (Y_LIKELY(chunkHeadPtr + 1 == rightBorder)) {
-                    chunkHeadPtr = leftBorder;
-                }
-                chunkFull = (chunkHeadPtr == chunkTailPtr);
-                break;
-            }
-
-            chunkHeadPtr = rightBorder - 1;
-            chunkTailPtr = leftBorder;
-
-            auto& globalCache = *(*GlobalSmallChunkCaches)[Kind];
-            globalCache.MoveGroupToGlobal(state, rank);
-        }
-    }
-
-#else
-
-    template <EAllocationKind Kind>
-    static Y_FORCE_INLINE void* Allocate(TMemoryTag tag, size_t rank, TThreadState* state)
-    {
-        size_t size = SmallRankToSize[rank];
-        StatisticsManager->IncrementTotalCounter<Kind>(state, tag, EBasicCounter::BytesAllocated, size);
-
-        auto& localCache = state->SmallBlobCache[Kind];
-        auto& allocator = *(*SmallArenaAllocators)[Kind][rank];
-
-        void* result;
-        while (true) {
-            auto& chunkPtr = localCache.RankToCachedChunkPtrHead[rank];
-            auto& cachedPtr = *chunkPtr;
-            auto* ptr = cachedPtr;
-            if (Y_LIKELY(ptr != reinterpret_cast<void*>(TThreadState::LeftSentinel))) {
-                --chunkPtr;
-                result = ptr;
-                allocator.UpdateChunkState(result, ESmallChunkState::Freed, ESmallChunkState::Allocated);
-                PoisonUninitializedRange(result, size);
-                break;
-            }
-
-            auto& globalCache = *(*GlobalSmallChunkCaches)[Kind];
-            if (globalCache.TryMoveGroupToLocal(state, rank)) {
-                continue;
-            }
-
-            auto count = allocator.PullMany(
-                chunkPtr + 1,
-                SmallRankBatchSize[rank]);
-            chunkPtr += count;
-        }
-
-        if constexpr(Kind == EAllocationKind::Tagged) {
-            allocator.SetMemoryTag(result, tag);
-        }
-
-        return result;
-    }
-
-    template <EAllocationKind Kind>
-    static Y_FORCE_INLINE void Free(void* ptr)
-    {
-        auto rank = PtrToSmallRank(ptr);
-        auto size = SmallRankToSize[rank];
-
-        auto& allocator = *(*SmallArenaAllocators)[Kind][rank];
-
-        auto tag = NullMemoryTag;
-        if constexpr(Kind == EAllocationKind::Tagged) {
-            tag = allocator.GetAndResetMemoryTag(ptr);
-            YTALLOC_PARANOID_ASSERT(tag != NullMemoryTag);
-        }
-
-        allocator.UpdateChunkState(ptr, ESmallChunkState::Allocated, ESmallChunkState::Freed);
-        PoisonFreedRange(ptr, size);
-
-        auto* state = TThreadManager::FindThreadState();
-        if (Y_UNLIKELY(!state)) {
-            FreeGlobal<Kind>(tag, ptr, rank, size);
-            return;
-        }
-
-        StatisticsManager->IncrementTotalCounter<Kind>(state, tag, EBasicCounter::BytesFreed, size);
-
-        auto& localCache = state->SmallBlobCache[Kind];
-
-        while (true) {
-            auto& chunkPtrPtr = localCache.RankToCachedChunkPtrHead[rank];
-            auto& chunkPtr = *(chunkPtrPtr + 1);
-            if (Y_LIKELY(chunkPtr != reinterpret_cast<void*>(TThreadState::RightSentinel))) {
-                chunkPtr = ptr;
-                ++chunkPtrPtr;
-                break;
-            }
-
-            auto& globalCache = *(*GlobalSmallChunkCaches)[Kind];
-            globalCache.MoveGroupToGlobal(state, rank);
-        }
-    }
-#endif
-
-    static size_t GetAllocationSize(const void* ptr)
-    {
-        return SmallRankToSize[PtrToSmallRank(ptr)];
-    }
-
-    static size_t GetAllocationSize(size_t size)
-    {
-        return SmallRankToSize[SizeToSmallRank(size)];
-    }
-
-    static void PurgeCaches()
-    {
-        DoPurgeCaches<EAllocationKind::Untagged>();
-        DoPurgeCaches<EAllocationKind::Tagged>();
-    }
-
-private:
-    template <EAllocationKind Kind>
-    static void DoPurgeCaches()
-    {
-        auto* state = TThreadManager::GetThreadStateChecked();
-        for (size_t rank = 0; rank < SmallRankCount; ++rank) {
-            (*GlobalSmallChunkCaches)[Kind]->MoveAllToGlobal(state, rank);
-        }
-    }
-
-    template <EAllocationKind Kind>
-    static void* AllocateGlobal(TMemoryTag tag, size_t rank, size_t size)
-    {
-        StatisticsManager->IncrementTotalCounter(tag, EBasicCounter::BytesAllocated, size);
-
-        auto& allocator = *(*SmallArenaAllocators)[Kind][rank];
-        auto* result = allocator.Allocate(size);
-
-        if constexpr(Kind == EAllocationKind::Tagged) {
-            allocator.SetMemoryTag(result, tag);
-        }
-
-        return result;
-    }
-
-    template <EAllocationKind Kind>
-    static void FreeGlobal(TMemoryTag tag, void* ptr, size_t rank, size_t size)
-    {
-        StatisticsManager->IncrementTotalCounter(tag, EBasicCounter::BytesFreed, size);
-
-        auto& globalCache = *(*GlobalSmallChunkCaches)[Kind];
-        globalCache.MoveOneToGlobal(ptr, rank);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// Large blob allocator
-//
-// Like for small chunks, large blobs are grouped into arenas, where arena K handles
-// blobs of size (2^{K-1},2^K]. Memory is mapped in extents of LargeExtentSize bytes.
-// Each extent is split into segments of size 2^K (here segment is just a memory region, which may fully consist of
-// unmapped pages). When a segment is actually allocated, it becomes a blob and a TLargeBlobHeader
-// structure is placed at its start.
-//
-// When an extent is allocated, it is sliced into segments (not blobs, since no headers are placed and
-// no memory is touched). These segments are put into disposed segments list.
-//
-// For each blob two separate sizes are maintained: BytesAcquired indicates the number of bytes
-// acquired via madvise(MADV_POPULATE) from the system; BytesAllocated (<= BytesAcquired) corresponds
-// to the number of bytes claimed by the user (including the header and page size alignment).
-// If BytesAllocated == 0 then this blob is spare, i.e.
-// was freed and remains cached for further possible reuse.
-//
-// When a new blob is being allocated, the allocator first tries to extract a spare blob. On success,
-// its acquired size is extended (if needed); the acquired size never shrinks on allocation.
-// If no spare blobs exist, a disposed segment is extracted and is turned into a blob (i.e.
-// its header is initialized) and the needed number of bytes is acquired. If no disposed segments
-// exist, then a new extent is allocated and sliced into segments.
-//
-// The above algorithm only claims memory from the system (by means of madvise(MADV_POPULATE));
-// the reclaim is handled by a separate background mechanism. Two types of reclaimable memory
-// regions are possible:
-// * spare: these correspond to spare blobs; upon reclaiming this region becomes a disposed segment
-// * overhead: these correspond to trailing parts of allocated blobs in [BytesAllocated, BytesAcquired) byte range
-//
-// Reclaiming spare blobs is easy as these are explicitly tracked by spare blob lists. To reclaim,
-// we atomically extract a blob from a spare list, call madvise(MADV_FREE), and put the pointer to
-// the disposed segment list.
-//
-// Reclaiming overheads is more complicated since (a) allocated blobs are never tracked directly and
-// (b) reclaiming them may interfere with Allocate and Free.
-//
-// To overcome (a), for each extent we maintain a bitmap marking segments that are actually blobs
-// (i.e. contain a header). (For simplicity and efficiency this bitmap is just a vector of bytes.)
-// These flags are updated in Allocate/Free with appropriate memory ordering. Note that
-// blobs are only disposed (and are turned into segments) by the background thread; if this
-// thread discovers a segment that is marked as a blob, then it is safe to assume that this segment
-// remains a blob unless the thread disposes it.
-//
-// To overcome (b), each large blob header maintains a spin lock. When blob B is extracted
-// from a spare list in Allocate, an acquisition is tried. If successful, B is returned to the
-// user. Otherwise it is assumed that B is currently being examined by the background
-// reclaimer thread. Allocate then skips this blob and retries extraction; the problem is that
-// since the spare list is basically a stack one cannot just push B back into the spare list.
-// Instead, B is pushed into a special locked spare list. This list is purged by the background
-// thread on each tick and its items are pushed back into the usual spare list.
-//
-// A similar trick is used by Free: when invoked for blob B its spin lock acquisition is first
-// tried. Upon success, B is moved to the spare list. On failure, Free has to postpone this deallocation
-// by moving B into the freed locked list. This list, similarly, is being purged by the background thread.
-//
-// It remains to explain how the background thread computes the number of bytes to be reclaimed from
-// each arena. To this aim, we first compute the total number of reclaimable bytes.
-// This is the sum of spare and overhead bytes in all arenas minus the number of unreclaimable bytes
-// The latter grows linearly in the number of used bytes and is capped from below by a MinUnreclaimableLargeBytes;
-// and from above by MaxUnreclaimableLargeBytes. SetLargeUnreclaimableCoeff and Set(Min|Max)LargeUnreclaimableBytes
-// enable tuning these control knobs. The reclaimable bytes are being taken from arenas starting from those
-// with the largest spare and overhead volumes.
-//
-// The above implies that each large blob contains a fixed-size header preceeding it.
-// Hence ptr % PageSize == sizeof (TLargeBlobHeader) for each ptr returned by Allocate
-// (since large blob sizes are larger than PageSize and are divisible by PageSize).
-// For AllocatePageAligned, however, ptr must be divisible by PageSize. To handle such an allocation, we
-// artificially increase its size and align the result of Allocate up to the next page boundary.
-// When handling a deallocation, ptr is moved back by UnalignPtr (which is capable of dealing
-// with both the results of Allocate and AllocatePageAligned).
-// This technique applies to both large and huge blobs.
-
-enum ELargeBlobState : ui64
-{
-    Allocated   = 0x6c6c61656772616cULL, // largeall
-    Spare       = 0x727073656772616cULL, // largespr
-    LockedSpare = 0x70736c656772616cULL, // largelsp
-    LockedFreed = 0x72666c656772616cULL  // largelfr
-};
-
-// Every large blob (either tagged or not) is prepended with this header.
-struct TLargeBlobHeader
-    : public TFreeListItemBase<TLargeBlobHeader>
-{
-    TLargeBlobHeader(
-        TLargeBlobExtent* extent,
-        size_t bytesAcquired,
-        size_t bytesAllocated,
-        TMemoryTag tag)
-        : Extent(extent)
-        , BytesAcquired(bytesAcquired)
-        , Tag(tag)
-        , BytesAllocated(bytesAllocated)
-        , State(ELargeBlobState::Allocated)
-    { }
-
-    TLargeBlobExtent* Extent;
-    // Number of bytes in all acquired pages.
-    size_t BytesAcquired;
-    std::atomic<bool> Locked = false;
-    TMemoryTag Tag = NullMemoryTag;
-    // For spare blobs this is zero.
-    // For allocated blobs this is the number of bytes requested by user (not including header of any alignment).
-    size_t BytesAllocated;
-    ELargeBlobState State;
-    char Padding[12];
-};
-
-CHECK_HEADER_ALIGNMENT(TLargeBlobHeader)
-
-struct TLargeBlobExtent
-{
-    TLargeBlobExtent(size_t segmentCount, char* ptr)
-        : SegmentCount(segmentCount)
-        , Ptr(ptr)
-    { }
-
-    size_t SegmentCount;
-    char* Ptr;
-    TLargeBlobExtent* NextExtent = nullptr;
-
-    std::atomic<bool> DisposedFlags[0];
-};
-
-// A helper node that enables storing a number of extent's segments
-// in a free list. Recall that segments themselves do not posses any headers.
-struct TDisposedSegment
-    : public TFreeListItemBase<TDisposedSegment>
-{
-    size_t Index;
-    TLargeBlobExtent* Extent;
-};
-
-struct TLargeArena
-{
-    size_t Rank = 0;
-    size_t SegmentSize = 0;
-
-    TShardedFreeList<TLargeBlobHeader> SpareBlobs;
-    TFreeList<TLargeBlobHeader> LockedSpareBlobs;
-    TFreeList<TLargeBlobHeader> LockedFreedBlobs;
-    TFreeList<TDisposedSegment> DisposedSegments;
-    std::atomic<TLargeBlobExtent*> FirstExtent = nullptr;
-
-    TLargeBlobExtent* CurrentOverheadScanExtent = nullptr;
-    size_t CurrentOverheadScanSegment = 0;
-};
-
-template <bool Dumpable>
-class TLargeBlobAllocator
-{
-public:
-    TLargeBlobAllocator()
-        : ZoneAllocator_(LargeZoneStart(Dumpable), LargeZoneEnd(Dumpable))
-    {
-        for (size_t rank = 0; rank < Arenas_.size(); ++rank) {
-            auto& arena = Arenas_[rank];
-            arena.Rank = rank;
-            arena.SegmentSize = (1ULL << rank);
-        }
-    }
-
-    void* Allocate(size_t size)
-    {
-        auto* state = TThreadManager::FindThreadState();
-        return Y_LIKELY(state)
-            ? DoAllocate(state, size)
-            : DoAllocate(GlobalState.Get(), size);
-    }
-
-    void Free(void* ptr)
-    {
-        auto* state = TThreadManager::FindThreadState();
-        if (Y_LIKELY(state)) {
-            DoFree(state, ptr);
-        } else {
-            DoFree(GlobalState.Get(), ptr);
-        }
-    }
-
-    static size_t GetAllocationSize(const void* ptr)
-    {
-        UnalignPtr<TLargeBlobHeader>(ptr);
-        const auto* blob = PtrToHeader<TLargeBlobHeader>(ptr);
-        return blob->BytesAllocated;
-    }
-
-    static size_t GetAllocationSize(size_t size)
-    {
-        return GetBlobAllocationSize<TLargeBlobHeader>(size);
-    }
-
-    void RunBackgroundTasks()
-    {
-        ReinstallLockedBlobs();
-        ReclaimMemory();
-    }
-
-    void SetBacktraceProvider(TBacktraceProvider provider)
-    {
-        BacktraceProvider_.store(provider);
-    }
-
-private:
-    template <class TState>
-    void PopulateArenaPages(TState* state, TLargeArena* arena, void* ptr, size_t size)
-    {
-        MappedMemoryManager->Populate(ptr, size);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::BytesPopulated, size);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::PagesPopulated, size / PageSize);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::BytesCommitted, size);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::PagesCommitted, size / PageSize);
-    }
-
-    template <class TState>
-    void ReleaseArenaPages(TState* state, TLargeArena* arena, void* ptr, size_t size)
-    {
-        MappedMemoryManager->Release(ptr, size);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::BytesReleased, size);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::PagesReleased, size / PageSize);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::BytesCommitted, -size);
-        StatisticsManager->IncrementLargeArenaCounter(state, arena->Rank, ELargeArenaCounter::PagesCommitted, -size / PageSize);
-    }
-
-    bool TryLockBlob(TLargeBlobHeader* blob)
-    {
-        bool expected = false;
-        return blob->Locked.compare_exchange_strong(expected, true);
-    }
-
-    void UnlockBlob(TLargeBlobHeader* blob)
-    {
-        blob->Locked.store(false);
-    }
-
-    template <class TState>
-    void MoveBlobToSpare(TState* state, TLargeArena* arena, TLargeBlobHeader* blob, bool unlock)
-    {
-        auto rank = arena->Rank;
-        auto size = blob->BytesAllocated;
-        auto rawSize = GetRawBlobSize<TLargeBlobHeader>(size);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesSpare, blob->BytesAcquired);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesOverhead, -(blob->BytesAcquired - rawSize));
-        blob->BytesAllocated = 0;
-        if (unlock) {
-            UnlockBlob(blob);
-        } else {
-            YTALLOC_VERIFY(!blob->Locked.load());
-        }
-        blob->State = ELargeBlobState::Spare;
-        arena->SpareBlobs.Put(state, blob);
-    }
-
-    size_t GetBytesToReclaim(const std::array<TLocalLargeCounters, LargeRankCount>& arenaCounters)
-    {
-        size_t totalBytesAllocated = 0;
-        size_t totalBytesFreed = 0;
-        size_t totalBytesSpare = 0;
-        size_t totalBytesOverhead = 0;
-        for (size_t rank = 0; rank < Arenas_.size(); ++rank) {
-            const auto& counters = arenaCounters[rank];
-            totalBytesAllocated += counters[ELargeArenaCounter::BytesAllocated];
-            totalBytesFreed += counters[ELargeArenaCounter::BytesFreed];
-            totalBytesSpare += counters[ELargeArenaCounter::BytesSpare];
-            totalBytesOverhead += counters[ELargeArenaCounter::BytesOverhead];
-        }
-
-        auto totalBytesUsed = totalBytesAllocated - totalBytesFreed;
-        auto totalBytesReclaimable = totalBytesSpare + totalBytesOverhead;
-
-        auto threshold = ClampVal(
-            static_cast<size_t>(ConfigurationManager->GetLargeUnreclaimableCoeff() * totalBytesUsed),
-            ConfigurationManager->GetMinLargeUnreclaimableBytes(),
-            ConfigurationManager->GetMaxLargeUnreclaimableBytes());
-        if (totalBytesReclaimable < threshold) {
-            return 0;
-        }
-
-        auto bytesToReclaim = totalBytesReclaimable - threshold;
-        return AlignUp(bytesToReclaim, PageSize);
-    }
-
-    void ReinstallLockedSpareBlobs(TLargeArena* arena)
-    {
-        auto* blob = arena->LockedSpareBlobs.ExtractAll();
-        auto* state = TThreadManager::GetThreadStateChecked();
-
-        size_t count = 0;
-        while (blob) {
-            auto* nextBlob = blob->Next.load();
-            YTALLOC_VERIFY(!blob->Locked.load());
-            AssertBlobState(blob, ELargeBlobState::LockedSpare);
-            blob->State = ELargeBlobState::Spare;
-            arena->SpareBlobs.Put(state, blob);
-            blob = nextBlob;
-            ++count;
-        }
-
-        if (count > 0) {
-            YTALLOC_LOG_DEBUG("Locked spare blobs reinstalled (Rank: %d, Blobs: %zu)",
-                arena->Rank,
-                count);
-        }
-    }
-
-    void ReinstallLockedFreedBlobs(TLargeArena* arena)
-    {
-        auto* state = TThreadManager::GetThreadStateChecked();
-        auto* blob = arena->LockedFreedBlobs.ExtractAll();
-
-        size_t count = 0;
-        while (blob) {
-            auto* nextBlob = blob->Next.load();
-            AssertBlobState(blob, ELargeBlobState::LockedFreed);
-            MoveBlobToSpare(state, arena, blob, false);
-            ++count;
-            blob = nextBlob;
-        }
-
-        if (count > 0) {
-            YTALLOC_LOG_DEBUG("Locked freed blobs reinstalled (Rank: %d, Blobs: %zu)",
-                arena->Rank,
-                count);
-        }
-    }
-
-    void ReclaimSpareMemory(TLargeArena* arena, ssize_t bytesToReclaim)
-    {
-        if (bytesToReclaim <= 0) {
-            return;
-        }
-
-        auto rank = arena->Rank;
-        auto* state = TThreadManager::GetThreadStateChecked();
-
-        YTALLOC_LOG_DEBUG("Started processing spare memory in arena (BytesToReclaim: %zdM, Rank: %d)",
-            bytesToReclaim / 1_MB,
-            rank);
-
-        size_t bytesReclaimed = 0;
-        size_t blobsReclaimed = 0;
-        while (bytesToReclaim > 0) {
-            auto* blob = arena->SpareBlobs.ExtractRoundRobin(state);
-            if (!blob) {
-                break;
-            }
-
-            AssertBlobState(blob, ELargeBlobState::Spare);
-            YTALLOC_VERIFY(blob->BytesAllocated == 0);
-
-            auto bytesAcquired = blob->BytesAcquired;
-            StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesSpare, -bytesAcquired);
-            bytesToReclaim -= bytesAcquired;
-            bytesReclaimed += bytesAcquired;
-            blobsReclaimed += 1;
-
-            auto* extent = blob->Extent;
-            auto* ptr = reinterpret_cast<char*>(blob);
-            ReleaseArenaPages(
-                state,
-                arena,
-                ptr,
-                bytesAcquired);
-
-            size_t segmentIndex = (ptr - extent->Ptr) / arena->SegmentSize;
-            extent->DisposedFlags[segmentIndex].store(true, std::memory_order_relaxed);
-
-            auto* disposedSegment = DisposedSegmentPool_.Allocate();
-            disposedSegment->Index = segmentIndex;
-            disposedSegment->Extent = extent;
-            arena->DisposedSegments.Put(disposedSegment);
-        }
-
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::SpareBytesReclaimed, bytesReclaimed);
-
-        YTALLOC_LOG_DEBUG("Finished processing spare memory in arena (Rank: %d, BytesReclaimed: %zdM, BlobsReclaimed: %zu)",
-            arena->Rank,
-            bytesReclaimed / 1_MB,
-            blobsReclaimed);
-    }
-
-    void ReclaimOverheadMemory(TLargeArena* arena, ssize_t bytesToReclaim)
-    {
-        if (bytesToReclaim == 0) {
-            return;
-        }
-
-        auto* state = TThreadManager::GetThreadStateChecked();
-        auto rank = arena->Rank;
-
-        YTALLOC_LOG_DEBUG("Started processing overhead memory in arena (BytesToReclaim: %zdM, Rank: %d)",
-            bytesToReclaim / 1_MB,
-            rank);
-
-        size_t extentsTraversed = 0;
-        size_t segmentsTraversed = 0;
-        size_t bytesReclaimed = 0;
-
-        bool restartedFromFirstExtent = false;
-        auto& currentExtent = arena->CurrentOverheadScanExtent;
-        auto& currentSegment = arena->CurrentOverheadScanSegment;
-        while (bytesToReclaim > 0) {
-            if (!currentExtent) {
-                if (restartedFromFirstExtent) {
-                    break;
-                }
-                currentExtent = arena->FirstExtent.load();
-                if (!currentExtent) {
-                    break;
-                }
-                restartedFromFirstExtent = true;
-            }
-
-            while (currentSegment  < currentExtent->SegmentCount && bytesToReclaim > 0) {
-                ++segmentsTraversed;
-                if (!currentExtent->DisposedFlags[currentSegment].load(std::memory_order_acquire)) {
-                    auto* ptr = currentExtent->Ptr + currentSegment * arena->SegmentSize;
-                    auto* blob = reinterpret_cast<TLargeBlobHeader*>(ptr);
-                    YTALLOC_PARANOID_ASSERT(blob->Extent == currentExtent);
-                    if (TryLockBlob(blob)) {
-                        if (blob->BytesAllocated > 0) {
-                            size_t rawSize = GetRawBlobSize<TLargeBlobHeader>(blob->BytesAllocated);
-                            size_t bytesToRelease = blob->BytesAcquired - rawSize;
-                            if (bytesToRelease > 0) {
-                                ReleaseArenaPages(
-                                    state,
-                                    arena,
-                                    ptr + blob->BytesAcquired - bytesToRelease,
-                                    bytesToRelease);
-                                StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesOverhead, -bytesToRelease);
-                                blob->BytesAcquired = rawSize;
-                                bytesToReclaim -= bytesToRelease;
-                                bytesReclaimed += bytesToRelease;
-                            }
-                        }
-                        UnlockBlob(blob);
-                    }
-                }
-                ++currentSegment;
-            }
-
-            ++extentsTraversed;
-            currentSegment = 0;
-            currentExtent = currentExtent->NextExtent;
-        }
-
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::OverheadBytesReclaimed, bytesReclaimed);
-
-        YTALLOC_LOG_DEBUG("Finished processing overhead memory in arena (Rank: %d, Extents: %zu, Segments: %zu, BytesReclaimed: %zuM)",
-            arena->Rank,
-            extentsTraversed,
-            segmentsTraversed,
-            bytesReclaimed / 1_MB);
-    }
-
-    void ReinstallLockedBlobs()
-    {
-        for (auto& arena : Arenas_) {
-            ReinstallLockedSpareBlobs(&arena);
-            ReinstallLockedFreedBlobs(&arena);
-        }
-    }
-
-    void ReclaimMemory()
-    {
-        auto arenaCounters = StatisticsManager->GetLargeArenaAllocationCounters();
-        ssize_t bytesToReclaim = GetBytesToReclaim(arenaCounters);
-        if (bytesToReclaim == 0) {
-            return;
-        }
-
-        YTALLOC_LOG_DEBUG("Memory reclaim started (BytesToReclaim: %zdM)",
-            bytesToReclaim / 1_MB);
-
-        std::array<ssize_t, LargeRankCount * 2> bytesReclaimablePerArena;
-        for (size_t rank = 0; rank < LargeRankCount; ++rank) {
-            bytesReclaimablePerArena[rank * 2] = arenaCounters[rank][ELargeArenaCounter::BytesOverhead];
-            bytesReclaimablePerArena[rank * 2 + 1] = arenaCounters[rank][ELargeArenaCounter::BytesSpare];
-        }
-
-        std::array<ssize_t, LargeRankCount * 2> bytesToReclaimPerArena{};
-        while (bytesToReclaim > 0) {
-            ssize_t maxBytes = std::numeric_limits<ssize_t>::min();
-            int maxIndex = -1;
-            for (int index = 0; index < LargeRankCount * 2; ++index) {
-                if (bytesReclaimablePerArena[index] > maxBytes) {
-                    maxBytes = bytesReclaimablePerArena[index];
-                    maxIndex = index;
-                }
-            }
-
-            if (maxIndex < 0) {
-                break;
-            }
-
-            auto bytesToReclaimPerStep = std::min<ssize_t>({bytesToReclaim, maxBytes, 4_MB});
-            if (bytesToReclaimPerStep < 0) {
-                break;
-            }
-
-            bytesToReclaimPerArena[maxIndex] += bytesToReclaimPerStep;
-            bytesReclaimablePerArena[maxIndex] -= bytesToReclaimPerStep;
-            bytesToReclaim -= bytesToReclaimPerStep;
-        }
-
-        for (auto& arena : Arenas_) {
-            auto rank = arena.Rank;
-            ReclaimOverheadMemory(&arena, bytesToReclaimPerArena[rank * 2]);
-            ReclaimSpareMemory(&arena, bytesToReclaimPerArena[rank * 2 + 1]);
-        }
-
-        YTALLOC_LOG_DEBUG("Memory reclaim finished");
-    }
-
-    template <class TState>
-    void AllocateArenaExtent(TState* state, TLargeArena* arena)
-    {
-        auto rank = arena->Rank;
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::ExtentsAllocated, 1);
-
-        size_t segmentCount = LargeExtentSize / arena->SegmentSize;
-        size_t extentHeaderSize = AlignUp(sizeof (TLargeBlobExtent) + sizeof (TLargeBlobExtent::DisposedFlags[0]) * segmentCount, PageSize);
-        size_t allocationSize = extentHeaderSize + LargeExtentSize;
-
-        auto* ptr = ZoneAllocator_.Allocate(allocationSize, MAP_NORESERVE);
-        if (!Dumpable) {
-            MappedMemoryManager->DontDump(ptr, allocationSize);
-        }
-
-        if (auto backtraceProvider = BacktraceProvider_.load()) {
-            std::array<void*, MaxAllocationProfilingBacktraceDepth> frames;
-            auto frameCount = backtraceProvider(
-                frames.data(),
-                MaxAllocationProfilingBacktraceDepth,
-                3);
-            MmapObservationManager->EnqueueEvent(allocationSize, frames, frameCount);
-        }
-
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesMapped, allocationSize);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::PagesMapped, allocationSize / PageSize);
-
-        auto* extent = static_cast<TLargeBlobExtent*>(ptr);
-        MappedMemoryManager->Populate(ptr, extentHeaderSize);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesPopulated, extentHeaderSize);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::PagesPopulated, extentHeaderSize / PageSize);
-        StatisticsManager->IncrementSystemCounter(ESystemCounter::BytesAllocated, extentHeaderSize);
-
-        new (extent) TLargeBlobExtent(segmentCount, static_cast<char*>(ptr) + extentHeaderSize);
-
-        for (size_t index = 0; index < segmentCount; ++index) {
-            auto* disposedSegment = DisposedSegmentPool_.Allocate();
-            disposedSegment->Index = index;
-            disposedSegment->Extent = extent;
-            arena->DisposedSegments.Put(disposedSegment);
-            extent->DisposedFlags[index].store(true);
-        }
-
-        auto* expectedFirstExtent = arena->FirstExtent.load();
-        do {
-            extent->NextExtent = expectedFirstExtent;
-        } while (Y_UNLIKELY(!arena->FirstExtent.compare_exchange_weak(expectedFirstExtent, extent)));
-    }
-
-    template <class TState>
-    void* DoAllocate(TState* state, size_t size)
-    {
-        auto rawSize = GetRawBlobSize<TLargeBlobHeader>(size);
-        auto rank = GetLargeRank(rawSize);
-        auto tag = ConfigurationManager->IsLargeArenaAllocationProfiled(rank)
-            ? BacktraceManager->GetMemoryTagFromBacktrace(3)
-            : TThreadManager::GetCurrentMemoryTag();
-        auto& arena = Arenas_[rank];
-        YTALLOC_PARANOID_ASSERT(rawSize <= arena.SegmentSize);
-
-        TLargeBlobHeader* blob;
-        while (true) {
-            blob = arena.SpareBlobs.Extract(state);
-            if (blob) {
-                AssertBlobState(blob, ELargeBlobState::Spare);
-                if (TryLockBlob(blob)) {
-                    StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesSpare, -blob->BytesAcquired);
-                    if (blob->BytesAcquired < rawSize) {
-                        PopulateArenaPages(
-                            state,
-                            &arena,
-                            reinterpret_cast<char*>(blob) + blob->BytesAcquired,
-                            rawSize - blob->BytesAcquired);
-                        blob->BytesAcquired = rawSize;
-                    } else {
-                        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesOverhead, blob->BytesAcquired - rawSize);
-                    }
-                    YTALLOC_PARANOID_ASSERT(blob->BytesAllocated == 0);
-                    blob->BytesAllocated = size;
-                    blob->Tag = tag;
-                    blob->State = ELargeBlobState::Allocated;
-                    UnlockBlob(blob);
-                    break;
-                } else {
-                    blob->State = ELargeBlobState::LockedSpare;
-                    arena.LockedSpareBlobs.Put(blob);
-                }
-            }
-
-            auto* disposedSegment = arena.DisposedSegments.Extract();
-            if (disposedSegment) {
-                auto index = disposedSegment->Index;
-                auto* extent = disposedSegment->Extent;
-                DisposedSegmentPool_.Free(disposedSegment);
-
-                auto* ptr = extent->Ptr + index * arena.SegmentSize;
-                PopulateArenaPages(
-                    state,
-                    &arena,
-                    ptr,
-                    rawSize);
-
-                blob = reinterpret_cast<TLargeBlobHeader*>(ptr);
-                new (blob) TLargeBlobHeader(extent, rawSize, size, tag);
-
-                extent->DisposedFlags[index].store(false, std::memory_order_release);
-
-                break;
-            }
-
-            AllocateArenaExtent(state, &arena);
-        }
-
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BlobsAllocated, 1);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesAllocated, size);
-        StatisticsManager->IncrementTotalCounter(state, tag, EBasicCounter::BytesAllocated, size);
-        if (!Dumpable) {
-            StatisticsManager->IncrementUndumpableCounter(state, EUndumpableCounter::BytesAllocated, size);
-        }
-
-        auto* result = HeaderToPtr(blob);
-        YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(result) >= LargeZoneStart(Dumpable) && reinterpret_cast<uintptr_t>(result) < LargeZoneEnd(Dumpable));
-        PoisonUninitializedRange(result, size);
-        return result;
-    }
-
-    template <class TState>
-    void DoFree(TState* state, void* ptr)
-    {
-        YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) >= LargeZoneStart(Dumpable) && reinterpret_cast<uintptr_t>(ptr) < LargeZoneEnd(Dumpable));
-
-        auto* blob = PtrToHeader<TLargeBlobHeader>(ptr);
-        AssertBlobState(blob, ELargeBlobState::Allocated);
-
-        auto size = blob->BytesAllocated;
-        PoisonFreedRange(ptr, size);
-
-        auto rawSize = GetRawBlobSize<TLargeBlobHeader>(size);
-        auto rank = GetLargeRank(rawSize);
-        auto& arena = Arenas_[rank];
-        YTALLOC_PARANOID_ASSERT(blob->BytesAcquired <= arena.SegmentSize);
-
-        auto tag = blob->Tag;
-
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BlobsFreed, 1);
-        StatisticsManager->IncrementLargeArenaCounter(state, rank, ELargeArenaCounter::BytesFreed, size);
-        StatisticsManager->IncrementTotalCounter(state, tag, EBasicCounter::BytesFreed, size);
-        if (!Dumpable) {
-            StatisticsManager->IncrementUndumpableCounter(state, EUndumpableCounter::BytesFreed, size);
-        }
-
-        if (TryLockBlob(blob)) {
-            MoveBlobToSpare(state, &arena, blob, true);
-        } else {
-            blob->State = ELargeBlobState::LockedFreed;
-            arena.LockedFreedBlobs.Put(blob);
-        }
-    }
-
-private:
-    TZoneAllocator ZoneAllocator_;
-    std::array<TLargeArena, LargeRankCount> Arenas_;
-
-    static constexpr size_t DisposedSegmentsBatchSize = 1024;
-    TSystemPool<TDisposedSegment, DisposedSegmentsBatchSize> DisposedSegmentPool_;
-
-    std::atomic<TBacktraceProvider> BacktraceProvider_ = nullptr;
-};
-
-TExplicitlyConstructableSingleton<TLargeBlobAllocator<true>> DumpableLargeBlobAllocator;
-TExplicitlyConstructableSingleton<TLargeBlobAllocator<false>> UndumpableLargeBlobAllocator;
-
-////////////////////////////////////////////////////////////////////////////////
-// Huge blob allocator
-//
-// Basically a wrapper for TZoneAllocator.
-
-// Acts as a signature to detect broken headers.
-enum class EHugeBlobState : ui64
-{
-    Allocated = 0x72666c656772616cULL // hugeallc
-};
-
-// Every huge blob (both tagged or not) is prepended with this header.
-struct THugeBlobHeader
-{
-    THugeBlobHeader(TMemoryTag tag, size_t size, bool dumpable)
-        : Tag(tag)
-        , Size(size)
-        , State(EHugeBlobState::Allocated)
-        , Dumpable(dumpable)
-    { }
-
-    TMemoryTag Tag;
-    size_t Size;
-    EHugeBlobState State;
-    bool Dumpable;
-    char Padding[7];
-};
-
-CHECK_HEADER_ALIGNMENT(THugeBlobHeader)
-
-class THugeBlobAllocator
-{
-public:
-    THugeBlobAllocator()
-        : ZoneAllocator_(HugeZoneStart, HugeZoneEnd)
-    { }
-
-    void* Allocate(size_t size, bool dumpable)
-    {
-        YTALLOC_VERIFY(size <= MaxAllocationSize);
-        auto tag = TThreadManager::GetCurrentMemoryTag();
-        auto rawSize = GetRawBlobSize<THugeBlobHeader>(size);
-        auto* blob = static_cast<THugeBlobHeader*>(ZoneAllocator_.Allocate(rawSize, MAP_POPULATE));
-        if (!dumpable) {
-            MappedMemoryManager->DontDump(blob, rawSize);
-        }
-        new (blob) THugeBlobHeader(tag, size, dumpable);
-
-        StatisticsManager->IncrementTotalCounter(tag, EBasicCounter::BytesAllocated, size);
-        StatisticsManager->IncrementHugeCounter(EHugeCounter::BlobsAllocated, 1);
-        StatisticsManager->IncrementHugeCounter(EHugeCounter::BytesAllocated, size);
-        if (!dumpable) {
-            StatisticsManager->IncrementHugeUndumpableCounter(EUndumpableCounter::BytesAllocated, size);
-        }
-
-        auto* result = HeaderToPtr(blob);
-        PoisonUninitializedRange(result, size);
-        return result;
-    }
-
-    void Free(void* ptr)
-    {
-        auto* blob = PtrToHeader<THugeBlobHeader>(ptr);
-        AssertBlobState(blob, EHugeBlobState::Allocated);
-        auto tag = blob->Tag;
-        auto size = blob->Size;
-        auto dumpable = blob->Dumpable;
-        PoisonFreedRange(ptr, size);
-
-        auto rawSize = GetRawBlobSize<THugeBlobHeader>(size);
-        ZoneAllocator_.Free(blob, rawSize);
-
-        StatisticsManager->IncrementTotalCounter(tag, EBasicCounter::BytesFreed, size);
-        StatisticsManager->IncrementHugeCounter(EHugeCounter::BlobsFreed, 1);
-        StatisticsManager->IncrementHugeCounter(EHugeCounter::BytesFreed, size);
-        if (!dumpable) {
-            StatisticsManager->IncrementHugeUndumpableCounter(EUndumpableCounter::BytesFreed, size);
-        }
-    }
-
-    static size_t GetAllocationSize(const void* ptr)
-    {
-        UnalignPtr<THugeBlobHeader>(ptr);
-        const auto* blob = PtrToHeader<THugeBlobHeader>(ptr);
-        return blob->Size;
-    }
-
-    static size_t GetAllocationSize(size_t size)
-    {
-        return GetBlobAllocationSize<THugeBlobHeader>(size);
-    }
-
-private:
-    TZoneAllocator ZoneAllocator_;
-};
-
-TExplicitlyConstructableSingleton<THugeBlobAllocator> HugeBlobAllocator;
-
-////////////////////////////////////////////////////////////////////////////////
-// A thunk to large and huge blob allocators
-
-class TBlobAllocator
-{
-public:
-    static void* Allocate(size_t size)
-    {
-        InitializeGlobals();
-        bool dumpable = GetCurrentMemoryZone() != EMemoryZone::Undumpable;
-        // NB: Account for the header. Also note that we may safely ignore the alignment since
-        // HugeAllocationSizeThreshold is already page-aligned.
-        if (Y_LIKELY(size < HugeAllocationSizeThreshold - sizeof(TLargeBlobHeader) - RightReadableAreaSize)) {
-            void* result = dumpable
-                ? DumpableLargeBlobAllocator->Allocate(size)
-                : UndumpableLargeBlobAllocator->Allocate(size);
-            YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(result) >= LargeZoneStart(dumpable) && reinterpret_cast<uintptr_t>(result) < LargeZoneEnd(dumpable));
-            return result;
-        } else {
-            auto* result = HugeBlobAllocator->Allocate(size, dumpable);
-            YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(result) >= HugeZoneStart && reinterpret_cast<uintptr_t>(result) < HugeZoneEnd);
-            return result;
-        }
-    }
-
-    static void Free(void* ptr)
-    {
-        InitializeGlobals();
-        if (reinterpret_cast<uintptr_t>(ptr) < LargeZoneEnd(true)) {
-            YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) >= LargeZoneStart(true) && reinterpret_cast<uintptr_t>(ptr) < LargeZoneEnd(true));
-            UnalignPtr<TLargeBlobHeader>(ptr);
-            DumpableLargeBlobAllocator->Free(ptr);
-        } else if (reinterpret_cast<uintptr_t>(ptr) < LargeZoneEnd(false)) {
-            YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) >= LargeZoneStart(false) && reinterpret_cast<uintptr_t>(ptr) < LargeZoneEnd(false));
-            UnalignPtr<TLargeBlobHeader>(ptr);
-            UndumpableLargeBlobAllocator->Free(ptr);
-        } else if (reinterpret_cast<uintptr_t>(ptr) < HugeZoneEnd) {
-            YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) >= HugeZoneStart && reinterpret_cast<uintptr_t>(ptr) < HugeZoneEnd);
-            UnalignPtr<THugeBlobHeader>(ptr);
-            HugeBlobAllocator->Free(ptr);
-        } else {
-            YTALLOC_TRAP("Wrong ptr passed to Free");
-        }
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-Y_POD_THREAD(bool) CurrentThreadIsBackground;
-
-// Base class for all background threads.
-template <class T>
-class TBackgroundThreadBase
-{
-public:
-    TBackgroundThreadBase()
-        : State_(new TState())
-    {
-        NThreading::RegisterAtForkHandlers(
-            [=] { BeforeFork(); },
-            [=] { AfterForkParent(); },
-            [=] { AfterForkChild(); });
-    }
-
-    virtual ~TBackgroundThreadBase()
-    {
-        Stop();
-    }
-
-private:
-    struct TState
-        : public TSystemAllocatable
-    {
-        std::mutex StartStopMutex;
-        std::optional<std::thread> Thread;
-
-        std::mutex StopFlagMutex;
-        std::condition_variable StopFlagVariable;
-        std::chrono::system_clock::time_point LastInvocationTime;
-        bool StopFlag = false;
-        bool Paused = false;
-
-        std::atomic<int> ForkDepth = 0;
-        bool RestartAfterFork = false;
-    };
-
-    TState* State_;
-
-private:
-    void BeforeFork()
-    {
-        bool stopped = Stop();
-        if (State_->ForkDepth++ == 0) {
-            State_->RestartAfterFork = stopped;
-        }
-    }
-
-    void AfterForkParent()
-    {
-        if (--State_->ForkDepth == 0) {
-            if (State_->RestartAfterFork) {
-                Start(false);
-            }
-        }
-    }
-
-    void AfterForkChild()
-    {
-        bool restart = State_->RestartAfterFork;
-        State_ = new TState();
-        if (restart) {
-            Start(false);
-        }
-    }
-
-    virtual void ThreadMain() = 0;
-
-protected:
-    void Start(bool fromAlloc)
-    {
-        std::unique_lock<std::mutex> guard(State_->StartStopMutex, std::defer_lock);
-        if (fromAlloc) {
-            if (!guard.try_lock()) {
-                return;
-            }
-
-            if (State_->Paused) {
-                return;
-            }
-        } else {
-            guard.lock();
-        }
-
-        State_->Paused = false;
-        if (State_->Thread) {
-            return;
-        }
-
-        State_->StopFlag = false;
-
-        State_->Thread.emplace([=] {
-            CurrentThreadIsBackground = true;
-            ThreadMain();
-        });
-
-        OnStart();
-    }
-
-    bool Stop()
-    {
-        std::unique_lock<std::mutex> guard(State_->StartStopMutex);
-
-        State_->Paused = true;
-        if (!State_->Thread) {
-            return false;
-        }
-
-        std::unique_lock<std::mutex> flagGuard(State_->StopFlagMutex);
-        State_->StopFlag = true;
-        flagGuard.unlock();
-        State_->StopFlagVariable.notify_one();
-
-        State_->Thread->join();
-        State_->Thread.reset();
-
-        OnStop();
-
-        return true;
-    }
-
-    bool IsDone(TDuration interval)
-    {
-        std::unique_lock<std::mutex> flagGuard(State_->StopFlagMutex);
-        auto result = State_->StopFlagVariable.wait_until(
-            flagGuard,
-            State_->LastInvocationTime + std::chrono::microseconds(interval.MicroSeconds()),
-            [&] { return State_->StopFlag; });
-        State_->LastInvocationTime = std::chrono::system_clock::now();
-        return result;
-    }
-
-    virtual void OnStart()
-    { }
-
-    virtual void OnStop()
-    { }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Invokes madvise(MADV_STOCKPILE) periodically.
-class TStockpileThread
-    : public TBackgroundThreadBase<TStockpileThread>
-{
-public:
-    explicit TStockpileThread(int index)
-        : Index_(index)
-    {
-        Start(false);
-    }
-
-private:
-    const int Index_;
-
-    virtual void ThreadMain() override
-    {
-        TThread::SetCurrentThreadName(Sprintf("%s:%d", StockpileThreadName, Index_).c_str());
-
-        while (!IsDone(ConfigurationManager->GetStockpileInterval())) {
-            if (!MappedMemoryManager->Stockpile(ConfigurationManager->GetStockpileSize())) {
-                // No use to proceed.
-                YTALLOC_LOG_INFO("Stockpile call failed; terminating stockpile thread");
-                break;
-            }
-        }
-    }
-};
-
-// Manages a bunch of TStockpileThreads.
-class TStockpileManager
-{
-public:
-    void SpawnIfNeeded()
-    {
-        if (!ConfigurationManager->IsStockpileEnabled()) {
-            return;
-        }
-
-        int threadCount = ConfigurationManager->GetStockpileThreadCount();
-        while (static_cast<int>(Threads_.size()) > threadCount) {
-            Threads_.pop_back();
-        }
-        while (static_cast<int>(Threads_.size()) < threadCount) {
-            Threads_.push_back(std::make_unique<TStockpileThread>(static_cast<int>(Threads_.size())));
-        }
-    }
-
-private:
-    std::vector<std::unique_ptr<TStockpileThread>> Threads_;
-};
-
-TExplicitlyConstructableSingleton<TStockpileManager> StockpileManager;
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Time to wait before re-spawning the thread after a fork.
-static constexpr auto BackgroundThreadRespawnDelay = TDuration::Seconds(3);
-
-// Runs basic background activities: reclaim, logging, profiling etc.
-class TBackgroundThread
-    : public TBackgroundThreadBase<TBackgroundThread>
-{
-public:
-    bool IsStarted()
-    {
-        return Started_.load();
-    }
-
-    void SpawnIfNeeded()
-    {
-        if (CurrentThreadIsBackground) {
-            return;
-        }
-        Start(true);
-    }
-
-private:
-    std::atomic<bool> Started_ = false;
-
-private:
-    virtual void ThreadMain() override
-    {
-        TThread::SetCurrentThreadName(BackgroundThreadName);
-        TimingManager->DisableForCurrentThread();
-        MmapObservationManager->DisableForCurrentThread();
-
-        while (!IsDone(BackgroundInterval)) {
-            DumpableLargeBlobAllocator->RunBackgroundTasks();
-            UndumpableLargeBlobAllocator->RunBackgroundTasks();
-            MappedMemoryManager->RunBackgroundTasks();
-            TimingManager->RunBackgroundTasks();
-            MmapObservationManager->RunBackgroundTasks();
-            StockpileManager->SpawnIfNeeded();
-        }
-    }
-
-    virtual void OnStart() override
-    {
-        DoUpdateAllThreadsControlWord(true);
-    }
-
-    virtual void OnStop() override
-    {
-        DoUpdateAllThreadsControlWord(false);
-    }
-
-    void DoUpdateAllThreadsControlWord(bool started)
-    {
-        // Update threads' TLS.
-        ThreadManager->EnumerateThreadStatesSync(
-            [&] {
-                Started_.store(started);
-            },
-            [&] (auto* state) {
-                if (state->BackgroundThreadStarted) {
-                    *state->BackgroundThreadStarted = started;
-                }
-            });
-    }
-};
-
-TExplicitlyConstructableSingleton<TBackgroundThread> BackgroundThread;
-
-////////////////////////////////////////////////////////////////////////////////
-
-Y_FORCE_INLINE TThreadState* TThreadManager::GetThreadStateUnchecked()
-{
-    YTALLOC_PARANOID_ASSERT(ThreadState_);
-    return ThreadState_;
-}
-
-Y_FORCE_INLINE TThreadState* TThreadManager::FindThreadState()
-{
-    if (Y_LIKELY(ThreadState_)) {
-        return ThreadState_;
-    }
-
-    if (ThreadStateDestroyed_) {
-        return nullptr;
-    }
-
-    InitializeGlobals();
-
-    // InitializeGlobals must not allocate.
-    Y_ABORT_UNLESS(!ThreadState_);
-    ThreadState_ = ThreadManager->AllocateThreadState();
-    (&ThreadControlWord_)->Parts.ThreadStateValid = true;
-
-    return ThreadState_;
-}
-
-void TThreadManager::DestroyThread(void*)
-{
-    TSmallAllocator::PurgeCaches();
-
-    TThreadState* state = ThreadState_;
-    ThreadState_ = nullptr;
-    ThreadStateDestroyed_ = true;
-    (&ThreadControlWord_)->Parts.ThreadStateValid = false;
-
-    {
-        auto guard = GuardWithTiming(ThreadManager->ThreadRegistryLock_);
-        state->AllocationProfilingEnabled = nullptr;
-        state->BackgroundThreadStarted = nullptr;
-        ThreadManager->UnrefThreadState(state);
-    }
-}
-
-void TThreadManager::DestroyThreadState(TThreadState* state)
-{
-    StatisticsManager->AccumulateLocalCounters(state);
-    ThreadRegistry_.Remove(state);
-    ThreadStatePool_.Free(state);
-}
-
-void TThreadManager::AfterFork()
-{
-    auto guard = GuardWithTiming(ThreadRegistryLock_);
-    ThreadRegistry_.Clear();
-    TThreadState* state = ThreadState_;
-    if (state) {
-        ThreadRegistry_.PushBack(state);
-    }
-}
-
-TThreadState* TThreadManager::AllocateThreadState()
-{
-    auto* state = ThreadStatePool_.Allocate();
-    state->AllocationProfilingEnabled = &(*&ThreadControlWord_).Parts.AllocationProfilingEnabled;
-    state->BackgroundThreadStarted = &(*&ThreadControlWord_).Parts.BackgroundThreadStarted;
-
-    {
-        auto guard = GuardWithTiming(ThreadRegistryLock_);
-        // NB: These flags must be initialized under ThreadRegistryLock_; see EnumerateThreadStatesSync.
-        *state->AllocationProfilingEnabled = ConfigurationManager->IsAllocationProfilingEnabled();
-        *state->BackgroundThreadStarted = BackgroundThread->IsStarted();
-        ThreadRegistry_.PushBack(state);
-    }
-
-    // Need to pass some non-null value for DestroyThread to be called.
-    pthread_setspecific(ThreadDtorKey_, (void*)-1);
-
-    return state;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void InitializeGlobals()
-{
-    static std::once_flag Initialized;
-    std::call_once(Initialized, [] () {
-        LogManager.Construct();
-        BacktraceManager.Construct();
-        StatisticsManager.Construct();
-        MappedMemoryManager.Construct();
-        ThreadManager.Construct();
-        GlobalState.Construct();
-        DumpableLargeBlobAllocator.Construct();
-        UndumpableLargeBlobAllocator.Construct();
-        HugeBlobAllocator.Construct();
-        ConfigurationManager.Construct();
-        SystemAllocator.Construct();
-        TimingManager.Construct();
-        MmapObservationManager.Construct();
-        StockpileManager.Construct();
-        BackgroundThread.Construct();
-
-        SmallArenaAllocators.Construct();
-        auto constructSmallArenaAllocators = [&] (EAllocationKind kind, uintptr_t zonesStart) {
-            for (size_t rank = 1; rank < SmallRankCount; ++rank) {
-                (*SmallArenaAllocators)[kind][rank].Construct(kind, rank, zonesStart + rank * SmallZoneSize);
-            }
-        };
-        constructSmallArenaAllocators(EAllocationKind::Untagged, UntaggedSmallZonesStart);
-        constructSmallArenaAllocators(EAllocationKind::Tagged, TaggedSmallZonesStart);
-
-        GlobalSmallChunkCaches.Construct();
-        (*GlobalSmallChunkCaches)[EAllocationKind::Tagged].Construct(EAllocationKind::Tagged);
-        (*GlobalSmallChunkCaches)[EAllocationKind::Untagged].Construct(EAllocationKind::Untagged);
-    });
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void StartBackgroundThread()
-{
-    InitializeGlobals();
-    BackgroundThread->SpawnIfNeeded();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-template <class... Ts>
-Y_FORCE_INLINE void* AllocateSmallUntagged(size_t rank, Ts... args)
-{
-    auto* result = TSmallAllocator::Allocate<EAllocationKind::Untagged>(NullMemoryTag, rank, std::forward<Ts>(args)...);
-    YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(result) >= MinUntaggedSmallPtr && reinterpret_cast<uintptr_t>(result) < MaxUntaggedSmallPtr);
-    return result;
-}
-
-template <class... Ts>
-Y_FORCE_INLINE void* AllocateSmallTagged(ui64 controlWord, size_t rank, Ts... args)
-{
-    auto tag = Y_UNLIKELY((controlWord & TThreadManager::AllocationProfilingEnabledControlWordMask) && ConfigurationManager->IsSmallArenaAllocationProfiled(rank))
-        ? BacktraceManager->GetMemoryTagFromBacktrace(2)
-        : static_cast<TMemoryTag>(controlWord & TThreadManager::MemoryTagControlWordMask);
-    auto* result = TSmallAllocator::Allocate<EAllocationKind::Tagged>(tag, rank, std::forward<Ts>(args)...);
-    YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(result) >= MinTaggedSmallPtr && reinterpret_cast<uintptr_t>(result) < MaxTaggedSmallPtr);
-    return result;
-}
-
-Y_FORCE_INLINE void* AllocateInline(size_t size)
-{
-    size_t rank;
-    if (Y_LIKELY(size <= 512)) {
-        rank = SizeToSmallRank1[(size + 7) >> 3];
-    } else if (Y_LIKELY(size < LargeAllocationSizeThreshold)) {
-        rank = SizeToSmallRank2[(size - 1) >> 8];
-    } else {
-        StartBackgroundThread();
-        return TBlobAllocator::Allocate(size);
-    }
-
-    auto controlWord = TThreadManager::GetThreadControlWord();
-    if (Y_LIKELY(controlWord == TThreadManager::FastPathControlWord)) {
-        return AllocateSmallUntagged(rank, TThreadManager::GetThreadStateUnchecked());
-    }
-
-    if (Y_UNLIKELY(!(controlWord & TThreadManager::BackgroundThreadStartedControlWorkMask))) {
-        StartBackgroundThread();
-    }
-
-    if (!(controlWord & (TThreadManager::MemoryTagControlWordMask | TThreadManager::AllocationProfilingEnabledControlWordMask))) {
-        return AllocateSmallUntagged(rank);
-    } else {
-        return AllocateSmallTagged(controlWord, rank);
-    }
-}
-
-Y_FORCE_INLINE void* AllocateSmallInline(size_t rank)
-{
-    auto controlWord = TThreadManager::GetThreadControlWord();
-    if (Y_LIKELY(controlWord == TThreadManager::FastPathControlWord)) {
-        return AllocateSmallUntagged(rank, TThreadManager::GetThreadStateUnchecked());
-    }
-
-    if (!(controlWord & (TThreadManager::MemoryTagControlWordMask | TThreadManager::AllocationProfilingEnabledControlWordMask))) {
-        return AllocateSmallUntagged(rank);
-    } else {
-        return AllocateSmallTagged(controlWord, rank);
-    }
-}
-
-Y_FORCE_INLINE void* AllocatePageAlignedInline(size_t size)
-{
-    size = std::max(AlignUp(size, PageSize), PageSize);
-    void* result = size >= LargeAllocationSizeThreshold
-        ? AlignUp(TBlobAllocator::Allocate(size + PageSize), PageSize)
-        : Allocate(size);
-    YTALLOC_ASSERT(reinterpret_cast<uintptr_t>(result) % PageSize == 0);
-    return result;
-}
-
-Y_FORCE_INLINE void FreeNonNullInline(void* ptr)
-{
-    YTALLOC_ASSERT(ptr);
-    if (Y_LIKELY(reinterpret_cast<uintptr_t>(ptr) < UntaggedSmallZonesEnd)) {
-        YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) >= MinUntaggedSmallPtr && reinterpret_cast<uintptr_t>(ptr) < MaxUntaggedSmallPtr);
-        TSmallAllocator::Free<EAllocationKind::Untagged>(ptr);
-    } else if (Y_LIKELY(reinterpret_cast<uintptr_t>(ptr) < TaggedSmallZonesEnd)) {
-        YTALLOC_PARANOID_ASSERT(reinterpret_cast<uintptr_t>(ptr) >= MinTaggedSmallPtr && reinterpret_cast<uintptr_t>(ptr) < MaxTaggedSmallPtr);
-        TSmallAllocator::Free<EAllocationKind::Tagged>(ptr);
-    } else {
-        TBlobAllocator::Free(ptr);
-    }
-}
-
-Y_FORCE_INLINE void FreeInline(void* ptr)
-{
-    if (Y_LIKELY(ptr)) {
-        FreeNonNullInline(ptr);
-    }
-}
-
-Y_FORCE_INLINE size_t GetAllocationSizeInline(const void* ptr)
-{
-    if (Y_UNLIKELY(!ptr)) {
-        return 0;
-    }
-
-    auto uintptr = reinterpret_cast<uintptr_t>(ptr);
-    if (uintptr < UntaggedSmallZonesEnd) {
-        YTALLOC_PARANOID_ASSERT(uintptr >= MinUntaggedSmallPtr && uintptr < MaxUntaggedSmallPtr);
-        return TSmallAllocator::GetAllocationSize(ptr);
-    } else if (uintptr < TaggedSmallZonesEnd) {
-        YTALLOC_PARANOID_ASSERT(uintptr >= MinTaggedSmallPtr && uintptr < MaxTaggedSmallPtr);
-        return TSmallAllocator::GetAllocationSize(ptr);
-    } else if (uintptr < LargeZoneEnd(true)) {
-        YTALLOC_PARANOID_ASSERT(uintptr >= LargeZoneStart(true) && uintptr < LargeZoneEnd(true));
-        return TLargeBlobAllocator<true>::GetAllocationSize(ptr);
-    } else if (uintptr < LargeZoneEnd(false)) {
-        YTALLOC_PARANOID_ASSERT(uintptr >= LargeZoneStart(false) && uintptr < LargeZoneEnd(false));
-        return TLargeBlobAllocator<false>::GetAllocationSize(ptr);
-    } else if (uintptr < HugeZoneEnd) {
-        YTALLOC_PARANOID_ASSERT(uintptr >= HugeZoneStart && uintptr < HugeZoneEnd);
-        return THugeBlobAllocator::GetAllocationSize(ptr);
-    } else {
-        YTALLOC_TRAP("Wrong ptr passed to GetAllocationSizeInline");
-    }
-}
-
-Y_FORCE_INLINE size_t GetAllocationSizeInline(size_t size)
-{
-    if (size <= LargeAllocationSizeThreshold) {
-        return TSmallAllocator::GetAllocationSize(size);
-    } else if (size <= HugeAllocationSizeThreshold) {
-        return TLargeBlobAllocator<true>::GetAllocationSize(size);
-    } else {
-        return THugeBlobAllocator::GetAllocationSize(size);
-    }
-}
-
-void EnableLogging(TLogHandler logHandler)
-{
-    InitializeGlobals();
-    LogManager->EnableLogging(logHandler);
-}
-
-void SetBacktraceProvider(TBacktraceProvider provider)
-{
-    InitializeGlobals();
-    BacktraceManager->SetBacktraceProvider(provider);
-    DumpableLargeBlobAllocator->SetBacktraceProvider(provider);
-    UndumpableLargeBlobAllocator->SetBacktraceProvider(provider);
-}
-
-void SetBacktraceFormatter(TBacktraceFormatter provider)
-{
-    InitializeGlobals();
-    MmapObservationManager->SetBacktraceFormatter(provider);
-}
-
-void EnableStockpile()
-{
-    InitializeGlobals();
-    ConfigurationManager->EnableStockpile();
-}
-
-void SetStockpileInterval(TDuration value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetStockpileInterval(value);
-}
-
-void SetStockpileThreadCount(int value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetStockpileThreadCount(value);
-}
-
-void SetStockpileSize(size_t value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetStockpileSize(value);
-}
-
-void SetLargeUnreclaimableCoeff(double value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetLargeUnreclaimableCoeff(value);
-}
-
-void SetTimingEventThreshold(TDuration value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetTimingEventThreshold(value);
-}
-
-void SetMinLargeUnreclaimableBytes(size_t value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetMinLargeUnreclaimableBytes(value);
-}
-
-void SetMaxLargeUnreclaimableBytes(size_t value)
-{
-    InitializeGlobals();
-    ConfigurationManager->SetMaxLargeUnreclaimableBytes(value);
-}
-
-void SetAllocationProfilingEnabled(bool value)
-{
-    ConfigurationManager->SetAllocationProfilingEnabled(value);
-}
-
-void SetAllocationProfilingSamplingRate(double rate)
-{
-    ConfigurationManager->SetAllocationProfilingSamplingRate(rate);
-}
-
-void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value)
-{
-    ConfigurationManager->SetSmallArenaAllocationProfilingEnabled(rank, value);
-}
-
-void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value)
-{
-    ConfigurationManager->SetLargeArenaAllocationProfilingEnabled(rank, value);
-}
-
-void SetProfilingBacktraceDepth(int depth)
-{
-    ConfigurationManager->SetProfilingBacktraceDepth(depth);
-}
-
-void SetMinProfilingBytesUsedToReport(size_t size)
-{
-    ConfigurationManager->SetMinProfilingBytesUsedToReport(size);
-}
-
-void SetEnableEagerMemoryRelease(bool value)
-{
-    ConfigurationManager->SetEnableEagerMemoryRelease(value);
-}
-
-void SetEnableMadvisePopulate(bool value)
-{
-    ConfigurationManager->SetEnableMadvisePopulate(value);
-}
-
-TEnumIndexedArray<ETotalCounter, ssize_t> GetTotalAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetTotalAllocationCounters();
-}
-
-TEnumIndexedArray<ESystemCounter, ssize_t> GetSystemAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetSystemAllocationCounters();
-}
-
-TEnumIndexedArray<ESystemCounter, ssize_t> GetUndumpableAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetUndumpableAllocationCounters();
-}
-
-TEnumIndexedArray<ESmallCounter, ssize_t> GetSmallAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetSmallAllocationCounters();
-}
-
-TEnumIndexedArray<ESmallCounter, ssize_t> GetLargeAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetLargeAllocationCounters();
-}
-
-std::array<TEnumIndexedArray<ESmallArenaCounter, ssize_t>, SmallRankCount> GetSmallArenaAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetSmallArenaAllocationCounters();
-}
-
-std::array<TEnumIndexedArray<ELargeArenaCounter, ssize_t>, LargeRankCount> GetLargeArenaAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetLargeArenaAllocationCounters();
-}
-
-TEnumIndexedArray<EHugeCounter, ssize_t> GetHugeAllocationCounters()
-{
-    InitializeGlobals();
-    return StatisticsManager->GetHugeAllocationCounters();
-}
-
-std::vector<TProfiledAllocation> GetProfiledAllocationStatistics()
-{
-    InitializeGlobals();
-
-    if (!ConfigurationManager->IsAllocationProfilingEnabled()) {
-        return {};
-    }
-
-    std::vector<TMemoryTag> tags;
-    tags.reserve(MaxCapturedAllocationBacktraces + 1);
-    for (TMemoryTag tag = AllocationProfilingMemoryTagBase;
-        tag < AllocationProfilingMemoryTagBase + MaxCapturedAllocationBacktraces;
-        ++tag)
-    {
-        tags.push_back(tag);
-    }
-    tags.push_back(AllocationProfilingUnknownMemoryTag);
-
-    std::vector<TEnumIndexedArray<EBasicCounter, ssize_t>> counters;
-    counters.resize(tags.size());
-    StatisticsManager->GetTaggedMemoryCounters(tags.data(), tags.size(), counters.data());
-
-    std::vector<TProfiledAllocation> statistics;
-    for (size_t index = 0; index < tags.size(); ++index) {
-        if (counters[index][EBasicCounter::BytesUsed] < static_cast<ssize_t>(ConfigurationManager->GetMinProfilingBytesUsedToReport())) {
-            continue;
-        }
-        auto tag = tags[index];
-        auto optionalBacktrace = BacktraceManager->FindBacktrace(tag);
-        if (!optionalBacktrace && tag != AllocationProfilingUnknownMemoryTag) {
-            continue;
-        }
-        statistics.push_back(TProfiledAllocation{
-            optionalBacktrace.value_or(TBacktrace()),
-            counters[index]
-        });
-    }
-    return statistics;
-}
-
-TEnumIndexedArray<ETimingEventType, TTimingEventCounters> GetTimingEventCounters()
-{
-    InitializeGlobals();
-    return TimingManager->GetTimingEventCounters();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NYTAlloc
diff --git a/library/cpp/ytalloc/impl/ya.make b/library/cpp/ytalloc/impl/ya.make
deleted file mode 100644
index 23b6d5874e..0000000000
--- a/library/cpp/ytalloc/impl/ya.make
+++ /dev/null
@@ -1,15 +0,0 @@
-LIBRARY()
-
-ALLOCATOR_IMPL()
-SRCS(
-    bridge.cpp
-)
-
-PEERDIR(
-    library/cpp/malloc/api
-    library/cpp/yt/containers
-    library/cpp/yt/memory
-    library/cpp/yt/threading
-)
-
-END()
diff --git a/yt/yt/client/table_client/unittests/ya.make b/yt/yt/client/table_client/unittests/ya.make
index f7594f2b9a..a91bc4b66f 100644
--- a/yt/yt/client/table_client/unittests/ya.make
+++ b/yt/yt/client/table_client/unittests/ya.make
@@ -2,8 +2,6 @@ GTEST(unittester-client-table-client)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-ALLOCATOR(YT)
-
 SRCS(
     columnar_statistics_ut.cpp
     columnar_ut.cpp
diff --git a/yt/yt/client/unittests/ya.make b/yt/yt/client/unittests/ya.make
index f5111fca97..a71a98db2c 100644
--- a/yt/yt/client/unittests/ya.make
+++ b/yt/yt/client/unittests/ya.make
@@ -2,8 +2,6 @@ GTEST(unittester-client)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-ALLOCATOR(YT)
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/actions/unittests/ya.make b/yt/yt/core/actions/unittests/ya.make
index b1cf897388..10bbe6d32f 100644
--- a/yt/yt/core/actions/unittests/ya.make
+++ b/yt/yt/core/actions/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-actions)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/bus/unittests/ya.make b/yt/yt/core/bus/unittests/ya.make
index 7f56a36940..23ff019455 100644
--- a/yt/yt/core/bus/unittests/ya.make
+++ b/yt/yt/core/bus/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-bus)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/compression/unittests/ya.make b/yt/yt/core/compression/unittests/ya.make
index ec506bdc6f..6f7d0a63a1 100644
--- a/yt/yt/core/compression/unittests/ya.make
+++ b/yt/yt/core/compression/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-compression)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/concurrency/unittests/ya.make b/yt/yt/core/concurrency/unittests/ya.make
index b8b94dcfc9..61f7b416ad 100644
--- a/yt/yt/core/concurrency/unittests/ya.make
+++ b/yt/yt/core/concurrency/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-concurrency)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/crypto/unittests/ya.make b/yt/yt/core/crypto/unittests/ya.make
index 460de9e957..d46f2a4729 100644
--- a/yt/yt/core/crypto/unittests/ya.make
+++ b/yt/yt/core/crypto/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-crypto)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/http/unittests/ya.make b/yt/yt/core/http/unittests/ya.make
index c3724d1234..622590287c 100644
--- a/yt/yt/core/http/unittests/ya.make
+++ b/yt/yt/core/http/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-http)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/json/unittests/ya.make b/yt/yt/core/json/unittests/ya.make
index 8dc2d207d1..19c3198a5f 100644
--- a/yt/yt/core/json/unittests/ya.make
+++ b/yt/yt/core/json/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-json)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/logging/unittests/ya.make b/yt/yt/core/logging/unittests/ya.make
index 571cfee61e..3f5d7833de 100644
--- a/yt/yt/core/logging/unittests/ya.make
+++ b/yt/yt/core/logging/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-logging)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/misc/unittests/ya.make b/yt/yt/core/misc/unittests/ya.make
index 3d85710391..62fb8ece13 100644
--- a/yt/yt/core/misc/unittests/ya.make
+++ b/yt/yt/core/misc/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-misc)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/net/unittests/ya.make b/yt/yt/core/net/unittests/ya.make
index d1bf832968..9976ff1c18 100644
--- a/yt/yt/core/net/unittests/ya.make
+++ b/yt/yt/core/net/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-net)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/profiling/unittests/ya.make b/yt/yt/core/profiling/unittests/ya.make
index b31b812b83..8b9cc6c8de 100644
--- a/yt/yt/core/profiling/unittests/ya.make
+++ b/yt/yt/core/profiling/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-profiling)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/rpc/unittests/main/ya.make b/yt/yt/core/rpc/unittests/main/ya.make
index 773edcce96..37b494945c 100644
--- a/yt/yt/core/rpc/unittests/main/ya.make
+++ b/yt/yt/core/rpc/unittests/main/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-rpc)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/rpc/unittests/rpc_ut.cpp b/yt/yt/core/rpc/unittests/rpc_ut.cpp
index d83c1b7d6b..c887795911 100644
--- a/yt/yt/core/rpc/unittests/rpc_ut.cpp
+++ b/yt/yt/core/rpc/unittests/rpc_ut.cpp
@@ -620,39 +620,6 @@ TYPED_TEST(TNotGrpcTest, Compression)
     }
 }
 
-#if !defined(_asan_enabled_) && !defined(_msan_enabled_) && defined(_linux_)
-
-TYPED_TEST(TRpcTest, ResponseMemoryTag)
-{
-    static TMemoryTag testMemoryTag = 12345;
-    testMemoryTag++;
-    auto initialMemoryUsage = GetMemoryUsageForTag(testMemoryTag);
-
-    std::vector<TTestProxy::TRspPassCallPtr> rsps;
-    {
-        TTestProxy proxy(this->CreateChannel());
-        TString userName("user");
-
-        TMemoryTagGuard guard(testMemoryTag);
-
-        for (int i = 0; i < 1000; ++i) {
-            auto req = proxy.PassCall();
-            req->SetUser(userName);
-            req->SetMutationId(TGuid::Create());
-            req->SetRetry(false);
-            auto err = req->Invoke().Get();
-            rsps.push_back(err.ValueOrThrow());
-        }
-    }
-
-    auto currentMemoryUsage = GetMemoryUsageForTag(testMemoryTag);
-    EXPECT_GE(currentMemoryUsage - initialMemoryUsage, 200_KB)
-        << "InitialUsage: " << initialMemoryUsage << std::endl
-        << "Current: " << currentMemoryUsage;
-}
-
-#endif
-
 TYPED_TEST(TNotGrpcTest, RequestBytesThrottling)
 {
     auto configText = TString(R"({
diff --git a/yt/yt/core/rpc/unittests/shutdown/ya.make b/yt/yt/core/rpc/unittests/shutdown/ya.make
index a55c4d4fc0..ffc34b2bd0 100644
--- a/yt/yt/core/rpc/unittests/shutdown/ya.make
+++ b/yt/yt/core/rpc/unittests/shutdown/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-rpc-shutdown)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/ya.make b/yt/yt/core/ya.make
index 59362bed77..56334cc6c0 100644
--- a/yt/yt/core/ya.make
+++ b/yt/yt/core/ya.make
@@ -390,7 +390,7 @@ RECURSE(
     test_framework
 )
 
-IF (NOT OPENSOURCE)
+IF (NOT OPENSOURCE AND OS_LINUX)
     RECURSE(
         benchmarks
         bus/benchmarks
diff --git a/yt/yt/core/ypath/unittests/ya.make b/yt/yt/core/ypath/unittests/ya.make
index fa6d821da4..b4f2240c8c 100644
--- a/yt/yt/core/ypath/unittests/ya.make
+++ b/yt/yt/core/ypath/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-ypath)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/yson/unittests/ya.make b/yt/yt/core/yson/unittests/ya.make
index a153d12ce7..59a807fca4 100644
--- a/yt/yt/core/yson/unittests/ya.make
+++ b/yt/yt/core/yson/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-yson)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/core/ytree/unittests/ya.make b/yt/yt/core/ytree/unittests/ya.make
index 034ba0a1d5..7196cea98c 100644
--- a/yt/yt/core/ytree/unittests/ya.make
+++ b/yt/yt/core/ytree/unittests/ya.make
@@ -2,10 +2,6 @@ GTEST(unittester-core-ytree)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-IF (NOT OS_WINDOWS AND NOT ARCH_AARCH64)
-    ALLOCATOR(YT)
-ENDIF()
-
 PROTO_NAMESPACE(yt)
 
 SRCS(
diff --git a/yt/yt/library/auth/unittests/ya.make b/yt/yt/library/auth/unittests/ya.make
index c45504d10a..e72e513258 100644
--- a/yt/yt/library/auth/unittests/ya.make
+++ b/yt/yt/library/auth/unittests/ya.make
@@ -2,8 +2,6 @@ GTEST(unittester-library-auth)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-ALLOCATOR(YT)
-
 SRCS(
     auth_ut.cpp
 )
diff --git a/yt/yt/library/decimal/unittests/ya.make b/yt/yt/library/decimal/unittests/ya.make
index 76341ee7b2..172760632d 100644
--- a/yt/yt/library/decimal/unittests/ya.make
+++ b/yt/yt/library/decimal/unittests/ya.make
@@ -2,8 +2,6 @@ GTEST(unittester-library-decimal)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-ALLOCATOR(YT)
-
 SRCS(
     decimal_ut.cpp
 )
diff --git a/yt/yt/library/erasure/impl/unittests/ya.make b/yt/yt/library/erasure/impl/unittests/ya.make
index 15b3f6fb8d..d2b30ea140 100644
--- a/yt/yt/library/erasure/impl/unittests/ya.make
+++ b/yt/yt/library/erasure/impl/unittests/ya.make
@@ -2,8 +2,6 @@ GTEST(unittester-library-erasure)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-ALLOCATOR(YT)
-
 SRCS(
     erasure_stability_ut.cpp
 )
diff --git a/yt/yt/library/process/unittests/ya.make b/yt/yt/library/process/unittests/ya.make
index 7c5d0cb48f..149d9eee1f 100644
--- a/yt/yt/library/process/unittests/ya.make
+++ b/yt/yt/library/process/unittests/ya.make
@@ -2,8 +2,6 @@ GTEST(unittester-library-process)
 
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
-ALLOCATOR(YT)
-
 SRCS(
     pipes_ut.cpp
     process_ut.cpp
diff --git a/yt/yt/library/tvm/service/unittests/ya.make b/yt/yt/library/tvm/service/unittests/ya.make
index 28629e6d82..23ac522bd0 100644
--- a/yt/yt/library/tvm/service/unittests/ya.make
+++ b/yt/yt/library/tvm/service/unittests/ya.make
@@ -1,7 +1,5 @@
 GTEST(unittester-library-auth_tvm)
 
-ALLOCATOR(YT)
-
 INCLUDE(${ARCADIA_ROOT}/yt/opensource.inc)
 
 PEERDIR(
author	robot-piglet <robot-piglet@yandex-team.com>	2024-11-29 18:15:29 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2024-11-29 18:40:16 +0300
commit	2aee7cda6c758679145563340ff94fc652670ea1 (patch)
tree	9ea64f4df3b6a7d2444be0fb216793e7032c9460
parent	fefd531d45e1854a567b58f4d6a661d3d365ece1 (diff)
download	ydb-2aee7cda6c758679145563340ff94fc652670ea1.tar.gz