diff options
author | prime <prime@yandex-team.ru> | 2022-02-10 16:46:00 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:00 +0300 |
commit | 3695a7cd42b74a4987d8d5a8f2e2443556998943 (patch) | |
tree | ee79ee9294a61ee00e647684b3700d0a87e102a3 /contrib/libs/tcmalloc | |
parent | 4d8b546b89b5afc08cf3667e176271c7ba935f33 (diff) | |
download | ydb-3695a7cd42b74a4987d8d5a8f2e2443556998943.tar.gz |
Restoring authorship annotation for <prime@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/tcmalloc')
178 files changed, 11467 insertions, 11467 deletions
diff --git a/contrib/libs/tcmalloc/common.inc b/contrib/libs/tcmalloc/common.inc index 077942c387..3c318bef14 100644 --- a/contrib/libs/tcmalloc/common.inc +++ b/contrib/libs/tcmalloc/common.inc @@ -1,58 +1,58 @@ -GLOBAL_SRCS( - # TCMalloc - tcmalloc/tcmalloc.cc - - # Common Sources - tcmalloc/arena.cc - tcmalloc/background.cc - tcmalloc/central_freelist.cc - tcmalloc/common.cc - tcmalloc/cpu_cache.cc - tcmalloc/experimental_pow2_below64_size_class.cc - tcmalloc/experimental_pow2_size_class.cc - tcmalloc/legacy_size_classes.cc - tcmalloc/guarded_page_allocator.cc - tcmalloc/huge_address_map.cc - tcmalloc/huge_allocator.cc - tcmalloc/huge_cache.cc - tcmalloc/huge_page_aware_allocator.cc - tcmalloc/page_allocator.cc - tcmalloc/page_allocator_interface.cc - tcmalloc/page_heap.cc - tcmalloc/pagemap.cc - tcmalloc/parameters.cc - tcmalloc/peak_heap_tracker.cc - tcmalloc/sampler.cc - tcmalloc/size_classes.cc - tcmalloc/span.cc - tcmalloc/stack_trace_table.cc - tcmalloc/static_vars.cc - tcmalloc/stats.cc - tcmalloc/system-alloc.cc - tcmalloc/thread_cache.cc - tcmalloc/transfer_cache.cc - - # Common deps - tcmalloc/experiment.cc - tcmalloc/noruntime_size_classes.cc - - # Internal libraries - tcmalloc/internal/cache_topology.cc - tcmalloc/internal/environment.cc - tcmalloc/internal/logging.cc - tcmalloc/internal/memory_stats.cc - tcmalloc/internal/mincore.cc - tcmalloc/internal/numa.cc - tcmalloc/internal/percpu.cc - tcmalloc/internal/percpu_rseq_asm.S - tcmalloc/internal/percpu_rseq_unsupported.cc - tcmalloc/internal/util.cc -) - -PEERDIR( - contrib/restricted/abseil-cpp - contrib/libs/tcmalloc/malloc_extension -) - -NO_UTIL() -NO_COMPILER_WARNINGS() +GLOBAL_SRCS( + # TCMalloc + tcmalloc/tcmalloc.cc + + # Common Sources + tcmalloc/arena.cc + tcmalloc/background.cc + tcmalloc/central_freelist.cc + tcmalloc/common.cc + tcmalloc/cpu_cache.cc + tcmalloc/experimental_pow2_below64_size_class.cc + tcmalloc/experimental_pow2_size_class.cc + tcmalloc/legacy_size_classes.cc + tcmalloc/guarded_page_allocator.cc + tcmalloc/huge_address_map.cc + tcmalloc/huge_allocator.cc + tcmalloc/huge_cache.cc + tcmalloc/huge_page_aware_allocator.cc + tcmalloc/page_allocator.cc + tcmalloc/page_allocator_interface.cc + tcmalloc/page_heap.cc + tcmalloc/pagemap.cc + tcmalloc/parameters.cc + tcmalloc/peak_heap_tracker.cc + tcmalloc/sampler.cc + tcmalloc/size_classes.cc + tcmalloc/span.cc + tcmalloc/stack_trace_table.cc + tcmalloc/static_vars.cc + tcmalloc/stats.cc + tcmalloc/system-alloc.cc + tcmalloc/thread_cache.cc + tcmalloc/transfer_cache.cc + + # Common deps + tcmalloc/experiment.cc + tcmalloc/noruntime_size_classes.cc + + # Internal libraries + tcmalloc/internal/cache_topology.cc + tcmalloc/internal/environment.cc + tcmalloc/internal/logging.cc + tcmalloc/internal/memory_stats.cc + tcmalloc/internal/mincore.cc + tcmalloc/internal/numa.cc + tcmalloc/internal/percpu.cc + tcmalloc/internal/percpu_rseq_asm.S + tcmalloc/internal/percpu_rseq_unsupported.cc + tcmalloc/internal/util.cc +) + +PEERDIR( + contrib/restricted/abseil-cpp + contrib/libs/tcmalloc/malloc_extension +) + +NO_UTIL() +NO_COMPILER_WARNINGS() diff --git a/contrib/libs/tcmalloc/default/ya.make b/contrib/libs/tcmalloc/default/ya.make index b69b077e19..dac75122ac 100644 --- a/contrib/libs/tcmalloc/default/ya.make +++ b/contrib/libs/tcmalloc/default/ya.make @@ -1,22 +1,22 @@ -LIBRARY() - +LIBRARY() + WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - -OWNER( - ayles - prime +LICENSE(Apache-2.0) + +OWNER( + ayles + prime g:cpp-contrib -) - -SRCDIR(contrib/libs/tcmalloc) - -INCLUDE(../common.inc) - -GLOBAL_SRCS( - # Options - tcmalloc/want_hpaa.cc -) - -END() +) + +SRCDIR(contrib/libs/tcmalloc) + +INCLUDE(../common.inc) + +GLOBAL_SRCS( + # Options + tcmalloc/want_hpaa.cc +) + +END() diff --git a/contrib/libs/tcmalloc/malloc_extension/ya.make b/contrib/libs/tcmalloc/malloc_extension/ya.make index c9a07c2454..610323a904 100644 --- a/contrib/libs/tcmalloc/malloc_extension/ya.make +++ b/contrib/libs/tcmalloc/malloc_extension/ya.make @@ -1,31 +1,31 @@ -LIBRARY() - +LIBRARY() + WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - -OWNER( +LICENSE(Apache-2.0) + +OWNER( prime - g:cpp-contrib -) - -NO_UTIL() - -NO_COMPILER_WARNINGS() - -# https://github.com/google/tcmalloc -VERSION(2020-11-23-a643d89610317be1eff9f7298104eef4c987d8d5) - -SRCDIR(contrib/libs/tcmalloc) - -SRCS( - tcmalloc/malloc_extension.cc -) - + g:cpp-contrib +) + +NO_UTIL() + +NO_COMPILER_WARNINGS() + +# https://github.com/google/tcmalloc +VERSION(2020-11-23-a643d89610317be1eff9f7298104eef4c987d8d5) + +SRCDIR(contrib/libs/tcmalloc) + +SRCS( + tcmalloc/malloc_extension.cc +) + PEERDIR( contrib/restricted/abseil-cpp -) - +) + ADDINCL( GLOBAL contrib/libs/tcmalloc ) @@ -33,5 +33,5 @@ ADDINCL( CFLAGS( -DTCMALLOC_256K_PAGES ) - -END() + +END() diff --git a/contrib/libs/tcmalloc/numa_256k/ya.make b/contrib/libs/tcmalloc/numa_256k/ya.make index ffede5df8b..728c02816d 100644 --- a/contrib/libs/tcmalloc/numa_256k/ya.make +++ b/contrib/libs/tcmalloc/numa_256k/ya.make @@ -1,28 +1,28 @@ -LIBRARY() - +LIBRARY() + WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - -OWNER( - ayles - prime +LICENSE(Apache-2.0) + +OWNER( + ayles + prime g:cpp-contrib -) - -SRCDIR(contrib/libs/tcmalloc) - -INCLUDE(../common.inc) - -GLOBAL_SRCS( - # Options - tcmalloc/want_hpaa.cc - tcmalloc/want_numa_aware.cc -) - -CFLAGS( - -DTCMALLOC_256K_PAGES - -DTCMALLOC_NUMA_AWARE -) - -END() +) + +SRCDIR(contrib/libs/tcmalloc) + +INCLUDE(../common.inc) + +GLOBAL_SRCS( + # Options + tcmalloc/want_hpaa.cc + tcmalloc/want_numa_aware.cc +) + +CFLAGS( + -DTCMALLOC_256K_PAGES + -DTCMALLOC_NUMA_AWARE +) + +END() diff --git a/contrib/libs/tcmalloc/numa_large_pages/ya.make b/contrib/libs/tcmalloc/numa_large_pages/ya.make index f39c1e15ba..6f1de511ed 100644 --- a/contrib/libs/tcmalloc/numa_large_pages/ya.make +++ b/contrib/libs/tcmalloc/numa_large_pages/ya.make @@ -1,28 +1,28 @@ -LIBRARY() - -WITHOUT_LICENSE_TEXTS() - -LICENSE(Apache-2.0) - -OWNER( - ayles - prime - g:cpp-contrib -) - -SRCDIR(contrib/libs/tcmalloc) - -INCLUDE(../common.inc) - -GLOBAL_SRCS( - # Options - tcmalloc/want_hpaa.cc - tcmalloc/want_numa_aware.cc -) - -CFLAGS( - -DTCMALLOC_LARGE_PAGES - -DTCMALLOC_NUMA_AWARE -) - -END() +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +LICENSE(Apache-2.0) + +OWNER( + ayles + prime + g:cpp-contrib +) + +SRCDIR(contrib/libs/tcmalloc) + +INCLUDE(../common.inc) + +GLOBAL_SRCS( + # Options + tcmalloc/want_hpaa.cc + tcmalloc/want_numa_aware.cc +) + +CFLAGS( + -DTCMALLOC_LARGE_PAGES + -DTCMALLOC_NUMA_AWARE +) + +END() diff --git a/contrib/libs/tcmalloc/patches/fork.patch b/contrib/libs/tcmalloc/patches/fork.patch index 2503394431..b29bb78261 100644 --- a/contrib/libs/tcmalloc/patches/fork.patch +++ b/contrib/libs/tcmalloc/patches/fork.patch @@ -1,310 +1,310 @@ ---- contrib/libs/tcmalloc/tcmalloc/central_freelist.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/central_freelist.h (working tree) -@@ -70,6 +70,14 @@ class CentralFreeList { - - SpanStats GetSpanStats() const; - -+ void AcquireInternalLocks() { -+ lock_.Lock(); -+ } -+ -+ void ReleaseInternalLocks() { -+ lock_.Unlock(); -+ } -+ - private: - // Release an object to spans. - // Returns object's span if it become completely free. ---- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (working tree) -@@ -1031,6 +1031,20 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { - } - } - -+void CPUCache::AcquireInternalLocks() { -+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; -+ ++cpu) { -+ resize_[cpu].lock.Lock(); -+ } -+} -+ -+void CPUCache::ReleaseInternalLocks() { -+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; -+ ++cpu) { -+ resize_[cpu].lock.Unlock(); -+ } -+} -+ - void CPUCache::PerClassResizeInfo::Init() { - state_.store(0, std::memory_order_relaxed); - } ---- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (working tree) -@@ -164,6 +164,9 @@ class CPUCache { - void Print(Printer* out) const; - void PrintInPbtxt(PbtxtRegion* region) const; - -+ void AcquireInternalLocks(); -+ void ReleaseInternalLocks(); -+ - private: - // Per-size-class freelist resizing info. - class PerClassResizeInfo { ---- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree) -@@ -116,6 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t - MallocExtension_Internal_GetMaxTotalThreadCacheBytes(); - ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( - int64_t value); -+ -+ABSL_ATTRIBUTE_WEAK void -+MallocExtension_EnableForkSupport(); -+ - } - - #endif ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree) -@@ -460,6 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) { - #endif - } - -+void MallocExtension::EnableForkSupport() { -+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS -+ if (&MallocExtension_EnableForkSupport != nullptr) { -+ MallocExtension_EnableForkSupport(); -+ } -+#endif -+} -+ - } // namespace tcmalloc - - // Default implementation just returns size. The expectation is that ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree) -@@ -468,6 +468,10 @@ class MallocExtension final { - // Specifies the release rate from the page heap. ProcessBackgroundActions - // must be called for this to be operative. - static void SetBackgroundReleaseRate(BytesPerSecond rate); -+ -+ // Enables fork support. -+ // Allocator will continue to function correctly in the child, after calling fork(). -+ static void EnableForkSupport(); - }; - - } // namespace tcmalloc ---- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree) -@@ -59,6 +59,7 @@ ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket> - Static::bucket_allocator_; - ABSL_CONST_INIT std::atomic<bool> Static::inited_{false}; - ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; -+ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; - ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; - ABSL_CONST_INIT PageMap Static::pagemap_; - ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock( -@@ -116,6 +117,13 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { - pagemap_.MapRootWithSmallPages(); - guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128); - inited_.store(true, std::memory_order_release); -+ -+ pageheap_lock.Unlock(); -+ pthread_atfork( -+ TCMallocPreFork, -+ TCMallocPostFork, -+ TCMallocPostFork); -+ pageheap_lock.Lock(); - } - } - ---- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree) -@@ -50,6 +50,9 @@ class CPUCache; - class PageMap; - class ThreadCache; - -+void TCMallocPreFork(); -+void TCMallocPostFork(); -+ - class Static { - public: - // True if InitIfNecessary() has run to completion. -@@ -124,6 +127,9 @@ class Static { - static void ActivateCPUCache() { cpu_cache_active_ = true; } - static void DeactivateCPUCache() { cpu_cache_active_ = false; } - -+ static bool ForkSupportEnabled() { return fork_support_enabled_; } -+ static void EnableForkSupport() { fork_support_enabled_ = true; } -+ - static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { - return - #ifndef TCMALLOC_DEPRECATED_PERTHREAD -@@ -169,6 +175,7 @@ class Static { - static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_; - ABSL_CONST_INIT static std::atomic<bool> inited_; - static bool cpu_cache_active_; -+ static bool fork_support_enabled_; - ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; - ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses> - numa_topology_; ---- contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (working tree) -@@ -354,6 +354,14 @@ ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0); - - } // namespace - -+void AcquireSystemAllocLock() { -+ spinlock.Lock(); -+} -+ -+void ReleaseSystemAllocLock() { -+ spinlock.Unlock(); -+} -+ - void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, - const MemoryTag tag) { - // If default alignment is set request the minimum alignment provided by ---- contrib/libs/tcmalloc/tcmalloc/system-alloc.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/system-alloc.h (working tree) -@@ -50,6 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment, - // call to SystemRelease. - int SystemReleaseErrors(); - -+void AcquireSystemAllocLock(); -+void ReleaseSystemAllocLock(); -+ - // This call is a hint to the operating system that the pages - // contained in the specified range of memory will not be used for a - // while, and can be released for use by other processes or the OS. ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree) -@@ -1117,6 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( - } - } - -+extern "C" void MallocExtension_EnableForkSupport() { -+ Static::EnableForkSupport(); -+} -+ -+void TCMallocPreFork() { -+ if (!Static::ForkSupportEnabled()) { -+ return; -+ } -+ -+ if (Static::CPUCacheActive()) { -+ Static::cpu_cache().AcquireInternalLocks(); -+ } -+ Static::transfer_cache().AcquireInternalLocks(); -+ guarded_page_lock.Lock(); -+ release_lock.Lock(); -+ pageheap_lock.Lock(); -+ AcquireSystemAllocLock(); -+} -+ -+void TCMallocPostFork() { -+ if (!Static::ForkSupportEnabled()) { -+ return; -+ } -+ -+ ReleaseSystemAllocLock(); -+ pageheap_lock.Unlock(); -+ guarded_page_lock.Unlock(); -+ release_lock.Unlock(); -+ Static::transfer_cache().ReleaseInternalLocks(); -+ if (Static::CPUCacheActive()) { -+ Static::cpu_cache().ReleaseInternalLocks(); -+ } -+} -+ - // nallocx slow path. - // Moved to a separate function because size_class_with_alignment is not inlined - // which would cause nallocx to become non-leaf function with stack frame and ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (working tree) -@@ -120,4 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW - } - #endif - -+void TCMallocInternalAcquireLocks(); -+void TCMallocInternalReleaseLocks(); -+ - #endif // TCMALLOC_TCMALLOC_H_ ---- contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (working tree) -@@ -176,6 +176,26 @@ class TransferCacheManager : public StaticForwarder { - } - } - -+ void AcquireInternalLocks() { -+ for (int i = 0; i < kNumClasses; ++i) { -+ if (implementation_ == TransferCacheImplementation::Ring) { -+ cache_[i].rbtc.AcquireInternalLocks(); -+ } else { -+ cache_[i].tc.AcquireInternalLocks(); -+ } -+ } -+ } -+ -+ void ReleaseInternalLocks() { -+ for (int i = 0; i < kNumClasses; ++i) { -+ if (implementation_ == TransferCacheImplementation::Ring) { -+ cache_[i].rbtc.ReleaseInternalLocks(); -+ } else { -+ cache_[i].tc.ReleaseInternalLocks(); -+ } -+ } -+ } -+ - void InsertRange(int size_class, absl::Span<void *> batch) { - if (implementation_ == TransferCacheImplementation::Ring) { - cache_[size_class].rbtc.InsertRange(size_class, batch); -@@ -295,6 +315,9 @@ class TransferCacheManager { - return TransferCacheImplementation::None; - } - -+ void AcquireInternalLocks() {} -+ void ReleaseInternalLocks() {} -+ - private: - CentralFreeList freelist_[kNumClasses]; - } ABSL_CACHELINE_ALIGNED; ---- contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (index) -+++ contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (working tree) -@@ -366,6 +366,18 @@ class TransferCache { - return freelist_do_not_access_directly_; - } - -+ void AcquireInternalLocks() -+ { -+ freelist().AcquireInternalLocks(); -+ lock_.Lock(); -+ } -+ -+ void ReleaseInternalLocks() -+ { -+ lock_.Unlock(); -+ freelist().ReleaseInternalLocks(); -+ } -+ - private: - // Returns first object of the i-th slot. - void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { -@@ -468,6 +480,18 @@ class RingBufferTransferCache { - - // These methods all do internal locking. - -+ void AcquireInternalLocks() -+ { -+ freelist().AcquireInternalLocks(); -+ lock_.Lock(); -+ } -+ -+ void ReleaseInternalLocks() -+ { -+ lock_.Unlock(); -+ freelist().ReleaseInternalLocks(); -+ } -+ - // Insert the specified batch into the transfer cache. N is the number of - // elements in the range. RemoveRange() is the opposite operation. - void InsertRange(int size_class, absl::Span<void *> batch) +--- contrib/libs/tcmalloc/tcmalloc/central_freelist.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/central_freelist.h (working tree) +@@ -70,6 +70,14 @@ class CentralFreeList { + + SpanStats GetSpanStats() const; + ++ void AcquireInternalLocks() { ++ lock_.Lock(); ++ } ++ ++ void ReleaseInternalLocks() { ++ lock_.Unlock(); ++ } ++ + private: + // Release an object to spans. + // Returns object's span if it become completely free. +--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (index) ++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (working tree) +@@ -1031,6 +1031,20 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { + } + } + ++void CPUCache::AcquireInternalLocks() { ++ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; ++ ++cpu) { ++ resize_[cpu].lock.Lock(); ++ } ++} ++ ++void CPUCache::ReleaseInternalLocks() { ++ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; ++ ++cpu) { ++ resize_[cpu].lock.Unlock(); ++ } ++} ++ + void CPUCache::PerClassResizeInfo::Init() { + state_.store(0, std::memory_order_relaxed); + } +--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (working tree) +@@ -164,6 +164,9 @@ class CPUCache { + void Print(Printer* out) const; + void PrintInPbtxt(PbtxtRegion* region) const; + ++ void AcquireInternalLocks(); ++ void ReleaseInternalLocks(); ++ + private: + // Per-size-class freelist resizing info. + class PerClassResizeInfo { +--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree) +@@ -116,6 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t + MallocExtension_Internal_GetMaxTotalThreadCacheBytes(); + ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( + int64_t value); ++ ++ABSL_ATTRIBUTE_WEAK void ++MallocExtension_EnableForkSupport(); ++ + } + + #endif +--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index) ++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree) +@@ -460,6 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) { + #endif + } + ++void MallocExtension::EnableForkSupport() { ++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS ++ if (&MallocExtension_EnableForkSupport != nullptr) { ++ MallocExtension_EnableForkSupport(); ++ } ++#endif ++} ++ + } // namespace tcmalloc + + // Default implementation just returns size. The expectation is that +--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree) +@@ -468,6 +468,10 @@ class MallocExtension final { + // Specifies the release rate from the page heap. ProcessBackgroundActions + // must be called for this to be operative. + static void SetBackgroundReleaseRate(BytesPerSecond rate); ++ ++ // Enables fork support. ++ // Allocator will continue to function correctly in the child, after calling fork(). ++ static void EnableForkSupport(); + }; + + } // namespace tcmalloc +--- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index) ++++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree) +@@ -59,6 +59,7 @@ ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket> + Static::bucket_allocator_; + ABSL_CONST_INIT std::atomic<bool> Static::inited_{false}; + ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; ++ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; + ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; + ABSL_CONST_INIT PageMap Static::pagemap_; + ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock( +@@ -116,6 +117,13 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { + pagemap_.MapRootWithSmallPages(); + guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128); + inited_.store(true, std::memory_order_release); ++ ++ pageheap_lock.Unlock(); ++ pthread_atfork( ++ TCMallocPreFork, ++ TCMallocPostFork, ++ TCMallocPostFork); ++ pageheap_lock.Lock(); + } + } + +--- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree) +@@ -50,6 +50,9 @@ class CPUCache; + class PageMap; + class ThreadCache; + ++void TCMallocPreFork(); ++void TCMallocPostFork(); ++ + class Static { + public: + // True if InitIfNecessary() has run to completion. +@@ -124,6 +127,9 @@ class Static { + static void ActivateCPUCache() { cpu_cache_active_ = true; } + static void DeactivateCPUCache() { cpu_cache_active_ = false; } + ++ static bool ForkSupportEnabled() { return fork_support_enabled_; } ++ static void EnableForkSupport() { fork_support_enabled_ = true; } ++ + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { + return + #ifndef TCMALLOC_DEPRECATED_PERTHREAD +@@ -169,6 +175,7 @@ class Static { + static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_; + ABSL_CONST_INIT static std::atomic<bool> inited_; + static bool cpu_cache_active_; ++ static bool fork_support_enabled_; + ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; + ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses> + numa_topology_; +--- contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (index) ++++ contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (working tree) +@@ -354,6 +354,14 @@ ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0); + + } // namespace + ++void AcquireSystemAllocLock() { ++ spinlock.Lock(); ++} ++ ++void ReleaseSystemAllocLock() { ++ spinlock.Unlock(); ++} ++ + void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, + const MemoryTag tag) { + // If default alignment is set request the minimum alignment provided by +--- contrib/libs/tcmalloc/tcmalloc/system-alloc.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/system-alloc.h (working tree) +@@ -50,6 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment, + // call to SystemRelease. + int SystemReleaseErrors(); + ++void AcquireSystemAllocLock(); ++void ReleaseSystemAllocLock(); ++ + // This call is a hint to the operating system that the pages + // contained in the specified range of memory will not be used for a + // while, and can be released for use by other processes or the OS. +--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index) ++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree) +@@ -1117,6 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( + } + } + ++extern "C" void MallocExtension_EnableForkSupport() { ++ Static::EnableForkSupport(); ++} ++ ++void TCMallocPreFork() { ++ if (!Static::ForkSupportEnabled()) { ++ return; ++ } ++ ++ if (Static::CPUCacheActive()) { ++ Static::cpu_cache().AcquireInternalLocks(); ++ } ++ Static::transfer_cache().AcquireInternalLocks(); ++ guarded_page_lock.Lock(); ++ release_lock.Lock(); ++ pageheap_lock.Lock(); ++ AcquireSystemAllocLock(); ++} ++ ++void TCMallocPostFork() { ++ if (!Static::ForkSupportEnabled()) { ++ return; ++ } ++ ++ ReleaseSystemAllocLock(); ++ pageheap_lock.Unlock(); ++ guarded_page_lock.Unlock(); ++ release_lock.Unlock(); ++ Static::transfer_cache().ReleaseInternalLocks(); ++ if (Static::CPUCacheActive()) { ++ Static::cpu_cache().ReleaseInternalLocks(); ++ } ++} ++ + // nallocx slow path. + // Moved to a separate function because size_class_with_alignment is not inlined + // which would cause nallocx to become non-leaf function with stack frame and +--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (working tree) +@@ -120,4 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW + } + #endif + ++void TCMallocInternalAcquireLocks(); ++void TCMallocInternalReleaseLocks(); ++ + #endif // TCMALLOC_TCMALLOC_H_ +--- contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (working tree) +@@ -176,6 +176,26 @@ class TransferCacheManager : public StaticForwarder { + } + } + ++ void AcquireInternalLocks() { ++ for (int i = 0; i < kNumClasses; ++i) { ++ if (implementation_ == TransferCacheImplementation::Ring) { ++ cache_[i].rbtc.AcquireInternalLocks(); ++ } else { ++ cache_[i].tc.AcquireInternalLocks(); ++ } ++ } ++ } ++ ++ void ReleaseInternalLocks() { ++ for (int i = 0; i < kNumClasses; ++i) { ++ if (implementation_ == TransferCacheImplementation::Ring) { ++ cache_[i].rbtc.ReleaseInternalLocks(); ++ } else { ++ cache_[i].tc.ReleaseInternalLocks(); ++ } ++ } ++ } ++ + void InsertRange(int size_class, absl::Span<void *> batch) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[size_class].rbtc.InsertRange(size_class, batch); +@@ -295,6 +315,9 @@ class TransferCacheManager { + return TransferCacheImplementation::None; + } + ++ void AcquireInternalLocks() {} ++ void ReleaseInternalLocks() {} ++ + private: + CentralFreeList freelist_[kNumClasses]; + } ABSL_CACHELINE_ALIGNED; +--- contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (index) ++++ contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (working tree) +@@ -366,6 +366,18 @@ class TransferCache { + return freelist_do_not_access_directly_; + } + ++ void AcquireInternalLocks() ++ { ++ freelist().AcquireInternalLocks(); ++ lock_.Lock(); ++ } ++ ++ void ReleaseInternalLocks() ++ { ++ lock_.Unlock(); ++ freelist().ReleaseInternalLocks(); ++ } ++ + private: + // Returns first object of the i-th slot. + void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { +@@ -468,6 +480,18 @@ class RingBufferTransferCache { + + // These methods all do internal locking. + ++ void AcquireInternalLocks() ++ { ++ freelist().AcquireInternalLocks(); ++ lock_.Lock(); ++ } ++ ++ void ReleaseInternalLocks() ++ { ++ lock_.Unlock(); ++ freelist().ReleaseInternalLocks(); ++ } ++ + // Insert the specified batch into the transfer cache. N is the number of + // elements in the range. RemoveRange() is the opposite operation. + void InsertRange(int size_class, absl::Span<void *> batch) diff --git a/contrib/libs/tcmalloc/patches/yandex.patch b/contrib/libs/tcmalloc/patches/yandex.patch index 12d11f2dad..98eaf2f4d8 100644 --- a/contrib/libs/tcmalloc/patches/yandex.patch +++ b/contrib/libs/tcmalloc/patches/yandex.patch @@ -1,91 +1,91 @@ -commit ab4069ebdd376db4d32c29e1a2414565ec849249 -author: prime -date: 2021-10-07T14:52:42+03:00 - - Apply yandex patches - ---- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -1112,6 +1112,11 @@ extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() { - return tcmalloc::tcmalloc_internal::Static::CPUCacheActive(); - } +commit ab4069ebdd376db4d32c29e1a2414565ec849249 +author: prime +date: 2021-10-07T14:52:42+03:00 -+extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() { -+ tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false); -+ tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache(); -+} -+ - extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() { - return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size(); - } ---- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -75,6 +75,7 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit( - ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty( - const char* name_data, size_t name_size, size_t* value); - ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive(); -+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches(); - ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize(); - ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval( - absl::Duration* ret); ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -287,6 +287,16 @@ bool MallocExtension::PerCpuCachesActive() { - #endif - } - -+void MallocExtension::DeactivatePerCpuCaches() { -+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS -+ if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) { -+ return; -+ } -+ -+ MallocExtension_Internal_DeactivatePerCpuCaches(); -+#endif -+} -+ - int32_t MallocExtension::GetMaxPerCpuCacheSize() { - #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) { ---- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -329,6 +329,11 @@ class MallocExtension final { - // Gets whether TCMalloc is using per-CPU caches. - static bool PerCpuCachesActive(); - -+ // Extension for unified agent. -+ // -+ // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321 -+ static void DeactivatePerCpuCaches(); -+ - // Gets the current maximum cache size per CPU cache. - static int32_t GetMaxPerCpuCacheSize(); - // Sets the maximum cache size per CPU cache. This is a per-core limit. ---- contrib/libs/tcmalloc/tcmalloc/static_vars.h (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -122,6 +122,7 @@ class Static { - return cpu_cache_active_; - } - static void ActivateCPUCache() { cpu_cache_active_ = true; } -+ static void DeactivateCPUCache() { cpu_cache_active_ = false; } - - static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { - return ---- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (5096009d22199137186c9a972bc88409d8ebd513) -+++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) -@@ -2210,14 +2210,7 @@ extern "C" void* TCMallocInternalNewArray(size_t size) - TCMALLOC_ALIAS(TCMallocInternalNew); - #else - { -- void* p = fast_alloc(CppPolicy().WithoutHooks(), size); -- // We keep this next instruction out of fast_alloc for a reason: when -- // it's in, and new just calls fast_alloc, the optimizer may fold the -- // new call into fast_alloc, which messes up our whole section-based -- // stacktracing (see ABSL_ATTRIBUTE_SECTION, above). This ensures fast_alloc -- // isn't the last thing this fn calls, and prevents the folding. -- MallocHook::InvokeNewHook(p, size); -- return p; -+ return fast_alloc(CppPolicy().WithoutHooks(), size); - } - #endif // TCMALLOC_ALIAS + Apply yandex patches +--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (5096009d22199137186c9a972bc88409d8ebd513) ++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) +@@ -1112,6 +1112,11 @@ extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() { + return tcmalloc::tcmalloc_internal::Static::CPUCacheActive(); + } + ++extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() { ++ tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false); ++ tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache(); ++} ++ + extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() { + return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size(); + } +--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513) ++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249) +@@ -75,6 +75,7 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit( + ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty( + const char* name_data, size_t name_size, size_t* value); + ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive(); ++ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches(); + ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize(); + ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval( + absl::Duration* ret); +--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (5096009d22199137186c9a972bc88409d8ebd513) ++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) +@@ -287,6 +287,16 @@ bool MallocExtension::PerCpuCachesActive() { + #endif + } + ++void MallocExtension::DeactivatePerCpuCaches() { ++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS ++ if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) { ++ return; ++ } ++ ++ MallocExtension_Internal_DeactivatePerCpuCaches(); ++#endif ++} ++ + int32_t MallocExtension::GetMaxPerCpuCacheSize() { + #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) { +--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513) ++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249) +@@ -329,6 +329,11 @@ class MallocExtension final { + // Gets whether TCMalloc is using per-CPU caches. + static bool PerCpuCachesActive(); + ++ // Extension for unified agent. ++ // ++ // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321 ++ static void DeactivatePerCpuCaches(); ++ + // Gets the current maximum cache size per CPU cache. + static int32_t GetMaxPerCpuCacheSize(); + // Sets the maximum cache size per CPU cache. This is a per-core limit. +--- contrib/libs/tcmalloc/tcmalloc/static_vars.h (5096009d22199137186c9a972bc88409d8ebd513) ++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (ab4069ebdd376db4d32c29e1a2414565ec849249) +@@ -122,6 +122,7 @@ class Static { + return cpu_cache_active_; + } + static void ActivateCPUCache() { cpu_cache_active_ = true; } ++ static void DeactivateCPUCache() { cpu_cache_active_ = false; } + + static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() { + return +--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (5096009d22199137186c9a972bc88409d8ebd513) ++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (ab4069ebdd376db4d32c29e1a2414565ec849249) +@@ -2210,14 +2210,7 @@ extern "C" void* TCMallocInternalNewArray(size_t size) + TCMALLOC_ALIAS(TCMallocInternalNew); + #else + { +- void* p = fast_alloc(CppPolicy().WithoutHooks(), size); +- // We keep this next instruction out of fast_alloc for a reason: when +- // it's in, and new just calls fast_alloc, the optimizer may fold the +- // new call into fast_alloc, which messes up our whole section-based +- // stacktracing (see ABSL_ATTRIBUTE_SECTION, above). This ensures fast_alloc +- // isn't the last thing this fn calls, and prevents the folding. +- MallocHook::InvokeNewHook(p, size); +- return p; ++ return fast_alloc(CppPolicy().WithoutHooks(), size); + } + #endif // TCMALLOC_ALIAS + diff --git a/contrib/libs/tcmalloc/slow_but_small/ya.make b/contrib/libs/tcmalloc/slow_but_small/ya.make index ddcb157d30..0509efd720 100644 --- a/contrib/libs/tcmalloc/slow_but_small/ya.make +++ b/contrib/libs/tcmalloc/slow_but_small/ya.make @@ -1,21 +1,21 @@ -LIBRARY() - +LIBRARY() + WITHOUT_LICENSE_TEXTS() -LICENSE(Apache-2.0) - -OWNER( - ayles - prime +LICENSE(Apache-2.0) + +OWNER( + ayles + prime g:cpp-contrib -) - -SRCDIR(contrib/libs/tcmalloc) - -INCLUDE(../common.inc) - +) + +SRCDIR(contrib/libs/tcmalloc) + +INCLUDE(../common.inc) + CFLAGS( -DTCMALLOC_SMALL_BUT_SLOW ) - -END() + +END() diff --git a/contrib/libs/tcmalloc/tcmalloc/BUILD b/contrib/libs/tcmalloc/tcmalloc/BUILD index e618b85eec..65b85ad2cf 100644 --- a/contrib/libs/tcmalloc/tcmalloc/BUILD +++ b/contrib/libs/tcmalloc/tcmalloc/BUILD @@ -1,1316 +1,1316 @@ -# Copyright 2019 The TCMalloc Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Description: -# -# tcmalloc is a fast malloc implementation. See -# https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of -# how this malloc works. - -load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test") -load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") -load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark", "create_tcmalloc_testsuite") - -package(default_visibility = ["//visibility:private"]) - -licenses(["notice"]) - -exports_files(["LICENSE"]) - -config_setting( - name = "llvm", - flag_values = { - "@bazel_tools//tools/cpp:compiler": "clang", - }, - visibility = [ - "//tcmalloc/internal:__subpackages__", - "//tcmalloc/testing:__subpackages__", - ], -) - -cc_library( - name = "experiment", - srcs = ["experiment.cc"], - hdrs = [ - "experiment.h", - "experiment_config.h", - ], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":malloc_extension", - "//tcmalloc/internal:environment", - "//tcmalloc/internal:logging", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", - ], -) - -# Dependencies required by :tcmalloc and its variants. Since :common is built -# several different ways, it should not be included on this list. -tcmalloc_deps = [ - ":experiment", - ":malloc_extension", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:config", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/base:dynamic_annotations", - "@com_google_absl//absl/debugging:leak_check", - "@com_google_absl//absl/debugging:stacktrace", - "@com_google_absl//absl/debugging:symbolize", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/numeric:bits", - "//tcmalloc/internal:config", - "//tcmalloc/internal:declarations", - "//tcmalloc/internal:linked_list", - "//tcmalloc/internal:logging", - "//tcmalloc/internal:memory_stats", - "//tcmalloc/internal:optimization", - "//tcmalloc/internal:percpu", -] - -# This library provides tcmalloc always -cc_library( - name = "tcmalloc", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//visibility:public"], - deps = tcmalloc_deps + [ - ":common", - ], - alwayslink = 1, -) - -# Provides tcmalloc always; use per-thread mode. -cc_library( - name = "tcmalloc_deprecated_perthread", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = [ - "//tcmalloc/internal:__pkg__", - "//tcmalloc/testing:__pkg__", - ], - deps = tcmalloc_deps + [ - ":common_deprecated_perthread", - ], - alwayslink = 1, -) - -# An opt tcmalloc build with ASSERTs forced on (by turning off -# NDEBUG). Useful for tracking down crashes in production binaries. -# To use add malloc = "//tcmalloc:opt_with_assertions" in your -# target's build rule. -cc_library( - name = "opt_with_assertions", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = [ - "-O2", - "-UNDEBUG", - ] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//visibility:public"], - deps = tcmalloc_deps + [ - ":common", - ], - alwayslink = 1, -) - -cc_library( - name = "size_class_info", - hdrs = ["size_class_info.h"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - "//tcmalloc/internal:logging", - ], -) - -# List of common source files used by the various tcmalloc libraries. -common_srcs = [ - "arena.cc", - "arena.h", - "background.cc", - "central_freelist.cc", - "central_freelist.h", - "common.cc", - "common.h", - "cpu_cache.cc", - "cpu_cache.h", - "experimental_pow2_below64_size_class.cc", - "experimental_pow2_size_class.cc", - "legacy_size_classes.cc", - "guarded_page_allocator.h", - "guarded_page_allocator.cc", - "huge_address_map.cc", - "huge_allocator.cc", - "huge_allocator.h", - "huge_cache.cc", - "huge_cache.h", - "huge_region.h", - "huge_page_aware_allocator.cc", - "huge_page_aware_allocator.h", - "huge_page_filler.h", - "huge_pages.h", - "page_allocator.cc", - "page_allocator.h", - "page_allocator_interface.cc", - "page_allocator_interface.h", - "page_heap.cc", - "page_heap.h", - "page_heap_allocator.h", - "pagemap.cc", - "pagemap.h", - "parameters.cc", - "peak_heap_tracker.cc", - "sampler.cc", - "sampler.h", - "size_classes.cc", - "span.cc", - "span.h", - "span_stats.h", - "stack_trace_table.cc", - "stack_trace_table.h", - "static_vars.cc", - "static_vars.h", - "stats.cc", - "system-alloc.cc", - "system-alloc.h", - "thread_cache.cc", - "thread_cache.h", - "tracking.h", - "transfer_cache_stats.h", - "transfer_cache.cc", - "transfer_cache.h", - "transfer_cache_internals.h", -] - -common_hdrs = [ - "arena.h", - "central_freelist.h", - "common.h", - "cpu_cache.h", - "guarded_page_allocator.h", - "huge_address_map.h", - "huge_allocator.h", - "tcmalloc_policy.h", - "huge_cache.h", - "huge_page_filler.h", - "huge_pages.h", - "huge_region.h", - "huge_page_aware_allocator.h", - "page_allocator.h", - "page_allocator_interface.h", - "page_heap.h", - "page_heap_allocator.h", - "pages.h", - "pagemap.h", - "parameters.h", - "peak_heap_tracker.h", - "sampler.h", - "span.h", - "span_stats.h", - "stack_trace_table.h", - "stats.h", - "static_vars.h", - "system-alloc.h", - "thread_cache.h", - "tracking.h", - "transfer_cache_stats.h", - "transfer_cache.h", - "transfer_cache_internals.h", -] - -common_deps = [ - ":experiment", - ":malloc_extension", - ":noruntime_size_classes", - ":size_class_info", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:config", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/base:dynamic_annotations", - "@com_google_absl//absl/container:fixed_array", - "@com_google_absl//absl/debugging:debugging_internal", - "@com_google_absl//absl/debugging:stacktrace", - "@com_google_absl//absl/debugging:symbolize", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/hash:hash", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - "//tcmalloc/internal:atomic_stats_counter", - "@com_google_absl//absl/numeric:bits", - "//tcmalloc/internal:config", - "//tcmalloc/internal:declarations", - "//tcmalloc/internal:environment", - "//tcmalloc/internal:linked_list", - "//tcmalloc/internal:logging", - "//tcmalloc/internal:mincore", - "//tcmalloc/internal:numa", - "//tcmalloc/internal:cache_topology", - "//tcmalloc/internal:optimization", - "//tcmalloc/internal:parameter_accessors", - "//tcmalloc/internal:percpu", - "//tcmalloc/internal:percpu_tcmalloc", - "//tcmalloc/internal:range_tracker", - "//tcmalloc/internal:timeseries_tracker", - "//tcmalloc/internal:util", -] - -cc_library( - name = "common", - srcs = common_srcs, - hdrs = common_hdrs, - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -cc_library( - name = "common_deprecated_perthread", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - deps = common_deps, - alwayslink = 1, -) - -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to use hugepage-aware -# allocator. -cc_library( - name = "want_hpaa", - srcs = ["want_hpaa.cc"], - copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//visibility:public"], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", - ], - alwayslink = 1, -) - -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to use hugepage-aware -# allocator with hpaa_subrelease=true. -cc_library( - name = "want_hpaa_subrelease", - srcs = ["want_hpaa_subrelease.cc"], - copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//visibility:public"], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", - ], - alwayslink = 1, -) - -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to not use hugepage-aware -# allocator. -cc_library( - name = "want_no_hpaa", - srcs = ["want_no_hpaa.cc"], - copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc/testing:__pkg__"], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", - ], - alwayslink = 1, -) - -# TEMPORARY. WILL BE REMOVED. -# Add a dep to this if you want your binary to use old span sizes. -cc_library( - name = "want_legacy_spans", - srcs = ["want_legacy_spans.cc"], - copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc/testing:__pkg__"], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", - ], - alwayslink = 1, -) - -# Add a dep to this if you want your binary to enable NUMA awareness by -# default. -cc_library( - name = "want_numa_aware", - srcs = ["want_numa_aware.cc"], - copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, - visibility = [ - "//tcmalloc:__pkg__", - "//tcmalloc/internal:__pkg__", - "//tcmalloc/testing:__pkg__", - ], - deps = [ - "//tcmalloc/internal:config", - "@com_google_absl//absl/base:core_headers", - ], - alwayslink = 1, -) - -cc_library( - name = "runtime_size_classes", - srcs = ["runtime_size_classes.cc"], - hdrs = ["runtime_size_classes.h"], - copts = TCMALLOC_DEFAULT_COPTS, - visibility = ["//visibility:private"], - deps = [ - ":size_class_info", - "//tcmalloc/internal:environment", - "//tcmalloc/internal:logging", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - ], - alwayslink = 1, -) - -cc_library( - name = "noruntime_size_classes", - srcs = ["noruntime_size_classes.cc"], - hdrs = ["runtime_size_classes.h"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":size_class_info", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - ], - alwayslink = 1, -) - -# TCMalloc with large pages is usually faster but fragmentation is higher. See -# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. -cc_library( - name = "tcmalloc_large_pages", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//visibility:public"], - deps = tcmalloc_deps + [ - ":common_large_pages", - ], - alwayslink = 1, -) - -cc_library( - name = "common_large_pages", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -# TCMalloc with 256k pages is usually faster but fragmentation is higher. See -# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. -cc_library( - name = "tcmalloc_256k_pages", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//visibility:public"], - deps = tcmalloc_deps + [ - ":common_256k_pages", - ], - alwayslink = 1, -) - -cc_library( - name = "common_256k_pages", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -cc_library( - name = "tcmalloc_256k_pages_and_numa", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = [ - "-DTCMALLOC_256K_PAGES", - "-DTCMALLOC_NUMA_AWARE", - ] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc/testing:__pkg__"], - deps = tcmalloc_deps + [ - ":common_256k_pages_and_numa", - ], - alwayslink = 1, -) - -cc_library( - name = "common_256k_pages_and_numa", - srcs = common_srcs, - hdrs = common_hdrs, - copts = [ - "-DTCMALLOC_256K_PAGES", - "-DTCMALLOC_NUMA_AWARE", - ] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -# TCMalloc small-but-slow is a a version of TCMalloc that chooses to minimize -# fragmentation at a *severe* cost to performance. It should be used by -# applications that have significant memory constraints, but don't need to -# frequently allocate/free objects. -# -# See https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. -cc_library( - name = "tcmalloc_small_but_slow", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//visibility:public"], - deps = tcmalloc_deps + [ - ":common_small_but_slow", - ], - alwayslink = 1, -) - -cc_library( - name = "common_small_but_slow", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -# TCMalloc with NUMA awareness compiled in. Note that by default NUMA awareness -# will still be disabled at runtime - this default can be changed by adding a -# dependency upon want_numa_aware, or overridden by setting the -# TCMALLOC_NUMA_AWARE environment variable. -cc_library( - name = "tcmalloc_numa_aware", - srcs = [ - "libc_override.h", - "libc_override_gcc_and_weak.h", - "libc_override_glibc.h", - "libc_override_redefine.h", - "tcmalloc.cc", - "tcmalloc.h", - ], - copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc/testing:__pkg__"], - deps = tcmalloc_deps + [ - ":common_numa_aware", - ], - alwayslink = 1, -) - -cc_library( - name = "common_numa_aware", - srcs = common_srcs, - hdrs = common_hdrs, - copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, - alwayslink = 1, -) - -# Export some header files to //tcmalloc/testing/... -package_group( - name = "tcmalloc_tests", - packages = [ - "//tcmalloc/...", - ], -) - -cc_library( - name = "headers_for_tests", - srcs = [ - "arena.h", - "central_freelist.h", - "guarded_page_allocator.h", - "huge_address_map.h", - "huge_allocator.h", - "huge_cache.h", - "huge_page_aware_allocator.h", - "huge_page_filler.h", - "huge_pages.h", - "huge_region.h", - "page_allocator.h", - "page_allocator_interface.h", - "page_heap.h", - "page_heap_allocator.h", - "pagemap.h", - "parameters.h", - "peak_heap_tracker.h", - "span_stats.h", - "stack_trace_table.h", - "tracking.h", - "transfer_cache.h", - "transfer_cache_internals.h", - "transfer_cache_stats.h", - ], - hdrs = [ - "common.h", - "pages.h", - "sampler.h", - "size_class_info.h", - "span.h", - "static_vars.h", - "stats.h", - "system-alloc.h", - ], - copts = TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = common_deps, -) - -cc_library( - name = "mock_central_freelist", - testonly = 1, - srcs = ["mock_central_freelist.cc"], - hdrs = ["mock_central_freelist.h"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_google_absl//absl/base", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest", - ], -) - -cc_library( - name = "page_allocator_test_util", - testonly = 1, - srcs = [ - "page_allocator_test_util.h", - ], - hdrs = ["page_allocator_test_util.h"], - copts = TCMALLOC_DEFAULT_COPTS, - visibility = ["//tcmalloc:tcmalloc_tests"], - deps = [ - ":common", - ":malloc_extension", - ], -) - -cc_test( - name = "page_heap_test", - srcs = ["page_heap_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest_main", - ], -) - -cc_library( - name = "mock_transfer_cache", - testonly = 1, - srcs = ["mock_transfer_cache.cc"], - hdrs = ["mock_transfer_cache.h"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - ":mock_central_freelist", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", - "@com_google_googletest//:gtest", - ], -) - -cc_fuzz_test( - name = "transfer_cache_fuzz", - testonly = 1, - srcs = ["transfer_cache_fuzz.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - tags = [ - "noasan", - "nomsan", - "notsan", - ], - deps = [ - ":common", - ":mock_central_freelist", - ":mock_transfer_cache", - ], -) - -cc_test( - name = "arena_test", - timeout = "moderate", - srcs = ["arena_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "transfer_cache_test", - timeout = "moderate", - srcs = ["transfer_cache_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - shard_count = 3, - deps = [ - ":common", - ":mock_central_freelist", - ":mock_transfer_cache", - "//tcmalloc/testing:thread_manager", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_benchmark( - name = "transfer_cache_benchmark", - srcs = ["transfer_cache_benchmark.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - deps = [ - ":common", - ":mock_central_freelist", - ":mock_transfer_cache", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/types:optional", - ], -) - -cc_test( - name = "huge_cache_test", - srcs = ["huge_cache_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/random", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "huge_allocator_test", - srcs = ["huge_allocator_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/random", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "huge_page_filler_test", - timeout = "long", - srcs = ["huge_page_filler_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "huge_page_aware_allocator_test", - srcs = ["huge_page_aware_allocator_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - malloc = "//tcmalloc", - tags = [ - ], - deps = [ - ":common", - ":malloc_extension", - ":page_allocator_test_util", - "//tcmalloc/internal:logging", - "//tcmalloc/testing:thread_manager", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/random", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "huge_region_test", - srcs = ["huge_region_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/random", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_benchmark( - name = "guarded_page_allocator_benchmark", - srcs = ["guarded_page_allocator_benchmark.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - ], -) - -cc_test( - name = "guarded_page_allocator_test", - srcs = ["guarded_page_allocator_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/numeric:bits", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "pagemap_test", - srcs = ["pagemap_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "realloc_test", - srcs = ["realloc_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - deps = [ - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "stack_trace_table_test", - srcs = ["stack_trace_table_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/debugging:stacktrace", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "system-alloc_test", - srcs = ["system-alloc_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = ["nosan"], - deps = [ - ":common", - ":malloc_extension", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - ], -) - -# This test has been named "large" since before tests were s/m/l. -# The "large" refers to large allocation sizes. -cc_test( - name = "tcmalloc_large_test", - size = "small", - timeout = "moderate", - srcs = ["tcmalloc_large_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = [ - "noasan", - "noubsan", - ], - deps = [ - ":common", - ":malloc_extension", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/container:node_hash_set", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "malloc_extension_system_malloc_test", - srcs = ["malloc_extension_system_malloc_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc/internal:system_malloc", - deps = [ - ":malloc_extension", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "malloc_extension_test", - srcs = ["malloc_extension_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = [ - "nosan", - ], - deps = [ - ":malloc_extension", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_fuzz_test( - name = "malloc_extension_fuzz", - testonly = 1, - srcs = ["malloc_extension_fuzz.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - tags = [ - "noasan", - "nomsan", - "notsan", - ], - deps = [ - ":malloc_extension", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", - ], -) - -cc_test( - name = "page_allocator_test", - srcs = ["page_allocator_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - deps = [ - ":common", - ":malloc_extension", - ":page_allocator_test_util", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "profile_test", - size = "medium", - timeout = "long", - srcs = ["profile_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - flaky = 1, # TODO(b/134690164) - linkstatic = 1, - malloc = "//tcmalloc", - shard_count = 2, - tags = [ - "noasan", - "nomsan", - "notsan", - ], - deps = [ - ":malloc_extension", - "//tcmalloc/internal:declarations", - "//tcmalloc/internal:linked_list", - "//tcmalloc/testing:testutil", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "thread_cache_test", - size = "medium", - srcs = ["thread_cache_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - malloc = "//tcmalloc:tcmalloc_deprecated_perthread", - tags = [ - "nosan", - ], - deps = [ - ":malloc_extension", - "//tcmalloc/internal:logging", - "//tcmalloc/internal:memory_stats", - "//tcmalloc/internal:parameter_accessors", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_testsuite( - name = "size_classes_test", - srcs = ["size_classes_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":size_class_info", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "size_classes_test_with_runtime_size_classes", - srcs = ["size_classes_with_runtime_size_classes_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - malloc = "//tcmalloc", - deps = [ - ":common", - ":runtime_size_classes", - ":size_class_info", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "heap_profiling_test", - srcs = ["heap_profiling_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - tags = [ - "nosan", - ], - deps = [ - ":common", - ":malloc_extension", - "//tcmalloc/internal:logging", - "//tcmalloc/internal:parameter_accessors", - "@com_github_google_benchmark//:benchmark", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "runtime_size_classes_test", - srcs = ["runtime_size_classes_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - linkstatic = 1, - malloc = "//tcmalloc", - deps = [ - ":runtime_size_classes", - "@com_github_google_benchmark//:benchmark", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_testsuite( - name = "span_test", - srcs = ["span_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_benchmark( - name = "span_benchmark", - srcs = ["span_benchmark.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = ":tcmalloc", - deps = [ - ":common", - "//tcmalloc/internal:logging", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/random", - ], -) - -cc_test( - name = "stats_test", - srcs = ["stats_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/base", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "huge_address_map_test", - srcs = ["huge_address_map_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_googletest//:gtest_main", - ], -) - -cc_library( - name = "malloc_extension", - srcs = ["malloc_extension.cc"], - hdrs = [ - "internal_malloc_extension.h", - "malloc_extension.h", - ], - copts = TCMALLOC_DEFAULT_COPTS, - visibility = [ - "//visibility:public", - ], - deps = [ - "//tcmalloc/internal:parameter_accessors", - "@com_google_absl//absl/base:config", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/base:malloc_internal", - "@com_google_absl//absl/functional:function_ref", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - ], -) - -cc_test( - name = "experiment_config_test", - srcs = ["experiment_config_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":experiment", - "@com_github_google_benchmark//:benchmark", - "@com_google_googletest//:gtest_main", - ], -) - -cc_fuzz_test( - name = "experiment_fuzz", - testonly = 1, - srcs = ["experiment_fuzz.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":experiment", - "@com_google_absl//absl/strings", - ], -) - -cc_fuzz_test( - name = "runtime_size_classes_fuzz", - testonly = 1, - srcs = ["runtime_size_classes_fuzz.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - ":common", - ":runtime_size_classes", - ":size_class_info", - "@com_google_absl//absl/strings", - ], -) - -cc_test( - name = "cpu_cache_test", - srcs = ["cpu_cache_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = ":tcmalloc_deprecated_perthread", - tags = [ - # TODO(b/193887621): Add TSan annotations to CPUCache and/or add - # atomics to PageMap - "notsan", - ], - deps = [ - ":common_deprecated_perthread", - "//tcmalloc/internal:optimization", - "//tcmalloc/internal:util", - "//tcmalloc/testing:testutil", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_absl//absl/random:seed_sequences", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_testsuite( - name = "central_freelist_test", - srcs = ["central_freelist_test.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - deps = [ - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest_main", - ], -) - -create_tcmalloc_benchmark( - name = "central_freelist_benchmark", - srcs = ["central_freelist_benchmark.cc"], - copts = TCMALLOC_DEFAULT_COPTS, - malloc = "//tcmalloc", - deps = [ - ":common", - "@com_github_google_benchmark//:benchmark", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/random", - "@com_google_absl//absl/types:optional", - ], -) +# Copyright 2019 The TCMalloc Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Description: +# +# tcmalloc is a fast malloc implementation. See +# https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of +# how this malloc works. + +load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test") +load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS") +load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark", "create_tcmalloc_testsuite") + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) + +exports_files(["LICENSE"]) + +config_setting( + name = "llvm", + flag_values = { + "@bazel_tools//tools/cpp:compiler": "clang", + }, + visibility = [ + "//tcmalloc/internal:__subpackages__", + "//tcmalloc/testing:__subpackages__", + ], +) + +cc_library( + name = "experiment", + srcs = ["experiment.cc"], + hdrs = [ + "experiment.h", + "experiment_config.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":malloc_extension", + "//tcmalloc/internal:environment", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + ], +) + +# Dependencies required by :tcmalloc and its variants. Since :common is built +# several different ways, it should not be included on this list. +tcmalloc_deps = [ + ":experiment", + ":malloc_extension", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/debugging:leak_check", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/debugging:symbolize", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/numeric:bits", + "//tcmalloc/internal:config", + "//tcmalloc/internal:declarations", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:memory_stats", + "//tcmalloc/internal:optimization", + "//tcmalloc/internal:percpu", +] + +# This library provides tcmalloc always +cc_library( + name = "tcmalloc", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = tcmalloc_deps + [ + ":common", + ], + alwayslink = 1, +) + +# Provides tcmalloc always; use per-thread mode. +cc_library( + name = "tcmalloc_deprecated_perthread", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = [ + "//tcmalloc/internal:__pkg__", + "//tcmalloc/testing:__pkg__", + ], + deps = tcmalloc_deps + [ + ":common_deprecated_perthread", + ], + alwayslink = 1, +) + +# An opt tcmalloc build with ASSERTs forced on (by turning off +# NDEBUG). Useful for tracking down crashes in production binaries. +# To use add malloc = "//tcmalloc:opt_with_assertions" in your +# target's build rule. +cc_library( + name = "opt_with_assertions", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = [ + "-O2", + "-UNDEBUG", + ] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = tcmalloc_deps + [ + ":common", + ], + alwayslink = 1, +) + +cc_library( + name = "size_class_info", + hdrs = ["size_class_info.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "//tcmalloc/internal:logging", + ], +) + +# List of common source files used by the various tcmalloc libraries. +common_srcs = [ + "arena.cc", + "arena.h", + "background.cc", + "central_freelist.cc", + "central_freelist.h", + "common.cc", + "common.h", + "cpu_cache.cc", + "cpu_cache.h", + "experimental_pow2_below64_size_class.cc", + "experimental_pow2_size_class.cc", + "legacy_size_classes.cc", + "guarded_page_allocator.h", + "guarded_page_allocator.cc", + "huge_address_map.cc", + "huge_allocator.cc", + "huge_allocator.h", + "huge_cache.cc", + "huge_cache.h", + "huge_region.h", + "huge_page_aware_allocator.cc", + "huge_page_aware_allocator.h", + "huge_page_filler.h", + "huge_pages.h", + "page_allocator.cc", + "page_allocator.h", + "page_allocator_interface.cc", + "page_allocator_interface.h", + "page_heap.cc", + "page_heap.h", + "page_heap_allocator.h", + "pagemap.cc", + "pagemap.h", + "parameters.cc", + "peak_heap_tracker.cc", + "sampler.cc", + "sampler.h", + "size_classes.cc", + "span.cc", + "span.h", + "span_stats.h", + "stack_trace_table.cc", + "stack_trace_table.h", + "static_vars.cc", + "static_vars.h", + "stats.cc", + "system-alloc.cc", + "system-alloc.h", + "thread_cache.cc", + "thread_cache.h", + "tracking.h", + "transfer_cache_stats.h", + "transfer_cache.cc", + "transfer_cache.h", + "transfer_cache_internals.h", +] + +common_hdrs = [ + "arena.h", + "central_freelist.h", + "common.h", + "cpu_cache.h", + "guarded_page_allocator.h", + "huge_address_map.h", + "huge_allocator.h", + "tcmalloc_policy.h", + "huge_cache.h", + "huge_page_filler.h", + "huge_pages.h", + "huge_region.h", + "huge_page_aware_allocator.h", + "page_allocator.h", + "page_allocator_interface.h", + "page_heap.h", + "page_heap_allocator.h", + "pages.h", + "pagemap.h", + "parameters.h", + "peak_heap_tracker.h", + "sampler.h", + "span.h", + "span_stats.h", + "stack_trace_table.h", + "stats.h", + "static_vars.h", + "system-alloc.h", + "thread_cache.h", + "tracking.h", + "transfer_cache_stats.h", + "transfer_cache.h", + "transfer_cache_internals.h", +] + +common_deps = [ + ":experiment", + ":malloc_extension", + ":noruntime_size_classes", + ":size_class_info", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/container:fixed_array", + "@com_google_absl//absl/debugging:debugging_internal", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/debugging:symbolize", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/hash:hash", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + "//tcmalloc/internal:atomic_stats_counter", + "@com_google_absl//absl/numeric:bits", + "//tcmalloc/internal:config", + "//tcmalloc/internal:declarations", + "//tcmalloc/internal:environment", + "//tcmalloc/internal:linked_list", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:mincore", + "//tcmalloc/internal:numa", + "//tcmalloc/internal:cache_topology", + "//tcmalloc/internal:optimization", + "//tcmalloc/internal:parameter_accessors", + "//tcmalloc/internal:percpu", + "//tcmalloc/internal:percpu_tcmalloc", + "//tcmalloc/internal:range_tracker", + "//tcmalloc/internal:timeseries_tracker", + "//tcmalloc/internal:util", +] + +cc_library( + name = "common", + srcs = common_srcs, + hdrs = common_hdrs, + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +cc_library( + name = "common_deprecated_perthread", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = common_deps, + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to use hugepage-aware +# allocator. +cc_library( + name = "want_hpaa", + srcs = ["want_hpaa.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + "//tcmalloc/internal:config", + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to use hugepage-aware +# allocator with hpaa_subrelease=true. +cc_library( + name = "want_hpaa_subrelease", + srcs = ["want_hpaa_subrelease.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + "//tcmalloc/internal:config", + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to not use hugepage-aware +# allocator. +cc_library( + name = "want_no_hpaa", + srcs = ["want_no_hpaa.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc/testing:__pkg__"], + deps = [ + "//tcmalloc/internal:config", + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +# TEMPORARY. WILL BE REMOVED. +# Add a dep to this if you want your binary to use old span sizes. +cc_library( + name = "want_legacy_spans", + srcs = ["want_legacy_spans.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc/testing:__pkg__"], + deps = [ + "//tcmalloc/internal:config", + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +# Add a dep to this if you want your binary to enable NUMA awareness by +# default. +cc_library( + name = "want_numa_aware", + srcs = ["want_numa_aware.cc"], + copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//tcmalloc:__pkg__", + "//tcmalloc/internal:__pkg__", + "//tcmalloc/testing:__pkg__", + ], + deps = [ + "//tcmalloc/internal:config", + "@com_google_absl//absl/base:core_headers", + ], + alwayslink = 1, +) + +cc_library( + name = "runtime_size_classes", + srcs = ["runtime_size_classes.cc"], + hdrs = ["runtime_size_classes.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":size_class_info", + "//tcmalloc/internal:environment", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "noruntime_size_classes", + srcs = ["noruntime_size_classes.cc"], + hdrs = ["runtime_size_classes.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":size_class_info", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +# TCMalloc with large pages is usually faster but fragmentation is higher. See +# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. +cc_library( + name = "tcmalloc_large_pages", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = tcmalloc_deps + [ + ":common_large_pages", + ], + alwayslink = 1, +) + +cc_library( + name = "common_large_pages", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +# TCMalloc with 256k pages is usually faster but fragmentation is higher. See +# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. +cc_library( + name = "tcmalloc_256k_pages", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = tcmalloc_deps + [ + ":common_256k_pages", + ], + alwayslink = 1, +) + +cc_library( + name = "common_256k_pages", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +cc_library( + name = "tcmalloc_256k_pages_and_numa", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = [ + "-DTCMALLOC_256K_PAGES", + "-DTCMALLOC_NUMA_AWARE", + ] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc/testing:__pkg__"], + deps = tcmalloc_deps + [ + ":common_256k_pages_and_numa", + ], + alwayslink = 1, +) + +cc_library( + name = "common_256k_pages_and_numa", + srcs = common_srcs, + hdrs = common_hdrs, + copts = [ + "-DTCMALLOC_256K_PAGES", + "-DTCMALLOC_NUMA_AWARE", + ] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +# TCMalloc small-but-slow is a a version of TCMalloc that chooses to minimize +# fragmentation at a *severe* cost to performance. It should be used by +# applications that have significant memory constraints, but don't need to +# frequently allocate/free objects. +# +# See https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details. +cc_library( + name = "tcmalloc_small_but_slow", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//visibility:public"], + deps = tcmalloc_deps + [ + ":common_small_but_slow", + ], + alwayslink = 1, +) + +cc_library( + name = "common_small_but_slow", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +# TCMalloc with NUMA awareness compiled in. Note that by default NUMA awareness +# will still be disabled at runtime - this default can be changed by adding a +# dependency upon want_numa_aware, or overridden by setting the +# TCMALLOC_NUMA_AWARE environment variable. +cc_library( + name = "tcmalloc_numa_aware", + srcs = [ + "libc_override.h", + "libc_override_gcc_and_weak.h", + "libc_override_glibc.h", + "libc_override_redefine.h", + "tcmalloc.cc", + "tcmalloc.h", + ], + copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc/testing:__pkg__"], + deps = tcmalloc_deps + [ + ":common_numa_aware", + ], + alwayslink = 1, +) + +cc_library( + name = "common_numa_aware", + srcs = common_srcs, + hdrs = common_hdrs, + copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, + alwayslink = 1, +) + +# Export some header files to //tcmalloc/testing/... +package_group( + name = "tcmalloc_tests", + packages = [ + "//tcmalloc/...", + ], +) + +cc_library( + name = "headers_for_tests", + srcs = [ + "arena.h", + "central_freelist.h", + "guarded_page_allocator.h", + "huge_address_map.h", + "huge_allocator.h", + "huge_cache.h", + "huge_page_aware_allocator.h", + "huge_page_filler.h", + "huge_pages.h", + "huge_region.h", + "page_allocator.h", + "page_allocator_interface.h", + "page_heap.h", + "page_heap_allocator.h", + "pagemap.h", + "parameters.h", + "peak_heap_tracker.h", + "span_stats.h", + "stack_trace_table.h", + "tracking.h", + "transfer_cache.h", + "transfer_cache_internals.h", + "transfer_cache_stats.h", + ], + hdrs = [ + "common.h", + "pages.h", + "sampler.h", + "size_class_info.h", + "span.h", + "static_vars.h", + "stats.h", + "system-alloc.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = common_deps, +) + +cc_library( + name = "mock_central_freelist", + testonly = 1, + srcs = ["mock_central_freelist.cc"], + hdrs = ["mock_central_freelist.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_google_absl//absl/base", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "page_allocator_test_util", + testonly = 1, + srcs = [ + "page_allocator_test_util.h", + ], + hdrs = ["page_allocator_test_util.h"], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = ["//tcmalloc:tcmalloc_tests"], + deps = [ + ":common", + ":malloc_extension", + ], +) + +cc_test( + name = "page_heap_test", + srcs = ["page_heap_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "mock_transfer_cache", + testonly = 1, + srcs = ["mock_transfer_cache.cc"], + hdrs = ["mock_transfer_cache.h"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + ":mock_central_freelist", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_googletest//:gtest", + ], +) + +cc_fuzz_test( + name = "transfer_cache_fuzz", + testonly = 1, + srcs = ["transfer_cache_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + tags = [ + "noasan", + "nomsan", + "notsan", + ], + deps = [ + ":common", + ":mock_central_freelist", + ":mock_transfer_cache", + ], +) + +cc_test( + name = "arena_test", + timeout = "moderate", + srcs = ["arena_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "transfer_cache_test", + timeout = "moderate", + srcs = ["transfer_cache_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + shard_count = 3, + deps = [ + ":common", + ":mock_central_freelist", + ":mock_transfer_cache", + "//tcmalloc/testing:thread_manager", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_benchmark( + name = "transfer_cache_benchmark", + srcs = ["transfer_cache_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + ":mock_central_freelist", + ":mock_transfer_cache", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/types:optional", + ], +) + +cc_test( + name = "huge_cache_test", + srcs = ["huge_cache_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/random", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_allocator_test", + srcs = ["huge_allocator_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_page_filler_test", + timeout = "long", + srcs = ["huge_page_filler_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_page_aware_allocator_test", + srcs = ["huge_page_aware_allocator_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + tags = [ + ], + deps = [ + ":common", + ":malloc_extension", + ":page_allocator_test_util", + "//tcmalloc/internal:logging", + "//tcmalloc/testing:thread_manager", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/random", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_region_test", + srcs = ["huge_region_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/random", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_benchmark( + name = "guarded_page_allocator_benchmark", + srcs = ["guarded_page_allocator_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + ], +) + +cc_test( + name = "guarded_page_allocator_test", + srcs = ["guarded_page_allocator_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/numeric:bits", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "pagemap_test", + srcs = ["pagemap_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "realloc_test", + srcs = ["realloc_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "stack_trace_table_test", + srcs = ["stack_trace_table_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "system-alloc_test", + srcs = ["system-alloc_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + tags = ["nosan"], + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +# This test has been named "large" since before tests were s/m/l. +# The "large" refers to large allocation sizes. +cc_test( + name = "tcmalloc_large_test", + size = "small", + timeout = "moderate", + srcs = ["tcmalloc_large_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + tags = [ + "noasan", + "noubsan", + ], + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_set", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "malloc_extension_system_malloc_test", + srcs = ["malloc_extension_system_malloc_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc/internal:system_malloc", + deps = [ + ":malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "malloc_extension_test", + srcs = ["malloc_extension_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + tags = [ + "nosan", + ], + deps = [ + ":malloc_extension", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_fuzz_test( + name = "malloc_extension_fuzz", + testonly = 1, + srcs = ["malloc_extension_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + tags = [ + "noasan", + "nomsan", + "notsan", + ], + deps = [ + ":malloc_extension", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + ], +) + +cc_test( + name = "page_allocator_test", + srcs = ["page_allocator_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + deps = [ + ":common", + ":malloc_extension", + ":page_allocator_test_util", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "profile_test", + size = "medium", + timeout = "long", + srcs = ["profile_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + flaky = 1, # TODO(b/134690164) + linkstatic = 1, + malloc = "//tcmalloc", + shard_count = 2, + tags = [ + "noasan", + "nomsan", + "notsan", + ], + deps = [ + ":malloc_extension", + "//tcmalloc/internal:declarations", + "//tcmalloc/internal:linked_list", + "//tcmalloc/testing:testutil", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/synchronization", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "thread_cache_test", + size = "medium", + srcs = ["thread_cache_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc:tcmalloc_deprecated_perthread", + tags = [ + "nosan", + ], + deps = [ + ":malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:memory_stats", + "//tcmalloc/internal:parameter_accessors", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_testsuite( + name = "size_classes_test", + srcs = ["size_classes_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "size_classes_test_with_runtime_size_classes", + srcs = ["size_classes_with_runtime_size_classes_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":common", + ":runtime_size_classes", + ":size_class_info", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "heap_profiling_test", + srcs = ["heap_profiling_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + tags = [ + "nosan", + ], + deps = [ + ":common", + ":malloc_extension", + "//tcmalloc/internal:logging", + "//tcmalloc/internal:parameter_accessors", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "runtime_size_classes_test", + srcs = ["runtime_size_classes_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + linkstatic = 1, + malloc = "//tcmalloc", + deps = [ + ":runtime_size_classes", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_testsuite( + name = "span_test", + srcs = ["span_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_benchmark( + name = "span_benchmark", + srcs = ["span_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = ":tcmalloc", + deps = [ + ":common", + "//tcmalloc/internal:logging", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/random", + ], +) + +cc_test( + name = "stats_test", + srcs = ["stats_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/base", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "huge_address_map_test", + srcs = ["huge_address_map_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "malloc_extension", + srcs = ["malloc_extension.cc"], + hdrs = [ + "internal_malloc_extension.h", + "malloc_extension.h", + ], + copts = TCMALLOC_DEFAULT_COPTS, + visibility = [ + "//visibility:public", + ], + deps = [ + "//tcmalloc/internal:parameter_accessors", + "@com_google_absl//absl/base:config", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:malloc_internal", + "@com_google_absl//absl/functional:function_ref", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + +cc_test( + name = "experiment_config_test", + srcs = ["experiment_config_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":experiment", + "@com_github_google_benchmark//:benchmark", + "@com_google_googletest//:gtest_main", + ], +) + +cc_fuzz_test( + name = "experiment_fuzz", + testonly = 1, + srcs = ["experiment_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":experiment", + "@com_google_absl//absl/strings", + ], +) + +cc_fuzz_test( + name = "runtime_size_classes_fuzz", + testonly = 1, + srcs = ["runtime_size_classes_fuzz.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + ":common", + ":runtime_size_classes", + ":size_class_info", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "cpu_cache_test", + srcs = ["cpu_cache_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = ":tcmalloc_deprecated_perthread", + tags = [ + # TODO(b/193887621): Add TSan annotations to CPUCache and/or add + # atomics to PageMap + "notsan", + ], + deps = [ + ":common_deprecated_perthread", + "//tcmalloc/internal:optimization", + "//tcmalloc/internal:util", + "//tcmalloc/testing:testutil", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:seed_sequences", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_testsuite( + name = "central_freelist_test", + srcs = ["central_freelist_test.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + deps = [ + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +create_tcmalloc_benchmark( + name = "central_freelist_benchmark", + srcs = ["central_freelist_benchmark.cc"], + copts = TCMALLOC_DEFAULT_COPTS, + malloc = "//tcmalloc", + deps = [ + ":common", + "@com_github_google_benchmark//:benchmark", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/random", + "@com_google_absl//absl/types:optional", + ], +) diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.cc b/contrib/libs/tcmalloc/tcmalloc/arena.cc index 5ba1a65bf3..d71241e617 100644 --- a/contrib/libs/tcmalloc/tcmalloc/arena.cc +++ b/contrib/libs/tcmalloc/tcmalloc/arena.cc @@ -15,24 +15,24 @@ #include "tcmalloc/arena.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/static_vars.h" +#include "tcmalloc/static_vars.h" #include "tcmalloc/system-alloc.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { -void* Arena::Alloc(size_t bytes, int alignment) { - ASSERT(alignment > 0); - { // First we need to move up to the correct alignment. - const int misalignment = - reinterpret_cast<uintptr_t>(free_area_) % alignment; - const int alignment_bytes = - misalignment != 0 ? alignment - misalignment : 0; - free_area_ += alignment_bytes; - free_avail_ -= alignment_bytes; - bytes_allocated_ += alignment_bytes; - } +void* Arena::Alloc(size_t bytes, int alignment) { + ASSERT(alignment > 0); + { // First we need to move up to the correct alignment. + const int misalignment = + reinterpret_cast<uintptr_t>(free_area_) % alignment; + const int alignment_bytes = + misalignment != 0 ? alignment - misalignment : 0; + free_area_ += alignment_bytes; + free_avail_ -= alignment_bytes; + bytes_allocated_ += alignment_bytes; + } char* result; if (free_avail_ < bytes) { size_t ask = bytes > kAllocIncrement ? bytes : kAllocIncrement; @@ -40,32 +40,32 @@ void* Arena::Alloc(size_t bytes, int alignment) { // TODO(b/171081864): Arena allocations should be made relatively // infrequently. Consider tagging this memory with sampled objects which // are also infrequently allocated. - // - // In the meantime it is important that we use the current NUMA partition - // rather than always using a particular one because it's possible that any - // single partition we choose might only contain nodes that the process is - // unable to allocate from due to cgroup restrictions. - MemoryTag tag; - const auto& numa_topology = Static::numa_topology(); - if (numa_topology.numa_aware()) { - tag = NumaNormalTag(numa_topology.GetCurrentPartition()); - } else { - tag = MemoryTag::kNormal; - } - free_area_ = - reinterpret_cast<char*>(SystemAlloc(ask, &actual_size, kPageSize, tag)); + // + // In the meantime it is important that we use the current NUMA partition + // rather than always using a particular one because it's possible that any + // single partition we choose might only contain nodes that the process is + // unable to allocate from due to cgroup restrictions. + MemoryTag tag; + const auto& numa_topology = Static::numa_topology(); + if (numa_topology.numa_aware()) { + tag = NumaNormalTag(numa_topology.GetCurrentPartition()); + } else { + tag = MemoryTag::kNormal; + } + free_area_ = + reinterpret_cast<char*>(SystemAlloc(ask, &actual_size, kPageSize, tag)); if (ABSL_PREDICT_FALSE(free_area_ == nullptr)) { Crash(kCrash, __FILE__, __LINE__, "FATAL ERROR: Out of memory trying to allocate internal tcmalloc " - "data (bytes, object-size); is something preventing mmap from " - "succeeding (sandbox, VSS limitations)?", + "data (bytes, object-size); is something preventing mmap from " + "succeeding (sandbox, VSS limitations)?", kAllocIncrement, bytes); } SystemBack(free_area_, actual_size); free_avail_ = actual_size; } - ASSERT(reinterpret_cast<uintptr_t>(free_area_) % alignment == 0); + ASSERT(reinterpret_cast<uintptr_t>(free_area_) % alignment == 0); result = free_area_; free_area_ += bytes; free_avail_ -= bytes; @@ -73,6 +73,6 @@ void* Arena::Alloc(size_t bytes, int alignment) { return reinterpret_cast<void*>(result); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.h b/contrib/libs/tcmalloc/tcmalloc/arena.h index 0655253540..5ff9a31e96 100644 --- a/contrib/libs/tcmalloc/tcmalloc/arena.h +++ b/contrib/libs/tcmalloc/tcmalloc/arena.h @@ -18,13 +18,13 @@ #include <stddef.h> #include <stdint.h> -#include "absl/base/attributes.h" +#include "absl/base/attributes.h" #include "absl/base/thread_annotations.h" #include "tcmalloc/common.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Arena allocation; designed for use by tcmalloc internal data structures like // spans, profiles, etc. Always expands. @@ -35,9 +35,9 @@ class Arena { // Return a properly aligned byte array of length "bytes". Crashes if // allocation fails. Requires pageheap_lock is held. - ABSL_ATTRIBUTE_RETURNS_NONNULL void* Alloc(size_t bytes, - int alignment = kAlignment) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + ABSL_ATTRIBUTE_RETURNS_NONNULL void* Alloc(size_t bytes, + int alignment = kAlignment) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Returns the total number of bytes allocated from this arena. Requires // pageheap_lock is held. @@ -61,8 +61,8 @@ class Arena { Arena& operator=(const Arena&) = delete; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_ARENA_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/arena_test.cc b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc index 2fb728cac9..65996d2d9d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/arena_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc @@ -1,38 +1,38 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/arena.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -TEST(Arena, AlignedAlloc) { - Arena arena; - absl::base_internal::SpinLockHolder h(&pageheap_lock); - EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(64, 64)) % 64, 0); - EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(7)) % 8, 0); - EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(128, 64)) % 64, 0); - for (int alignment = 1; alignment < 100; ++alignment) { - EXPECT_EQ( - reinterpret_cast<uintptr_t>(arena.Alloc(7, alignment)) % alignment, 0); - } -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/arena.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +TEST(Arena, AlignedAlloc) { + Arena arena; + absl::base_internal::SpinLockHolder h(&pageheap_lock); + EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(64, 64)) % 64, 0); + EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(7)) % 8, 0); + EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(128, 64)) % 64, 0); + for (int alignment = 1; alignment < 100; ++alignment) { + EXPECT_EQ( + reinterpret_cast<uintptr_t>(arena.Alloc(7, alignment)) % alignment, 0); + } +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/background.cc b/contrib/libs/tcmalloc/tcmalloc/background.cc index ec57c03901..bc62a5bf53 100644 --- a/contrib/libs/tcmalloc/tcmalloc/background.cc +++ b/contrib/libs/tcmalloc/tcmalloc/background.cc @@ -17,17 +17,17 @@ #include "absl/base/internal/sysinfo.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "tcmalloc/cpu_cache.h" +#include "tcmalloc/cpu_cache.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/percpu.h" #include "tcmalloc/internal_malloc_extension.h" #include "tcmalloc/malloc_extension.h" #include "tcmalloc/parameters.h" -#include "tcmalloc/static_vars.h" +#include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // Called by MallocExtension_Internal_ProcessBackgroundActions. @@ -84,98 +84,98 @@ void ReleasePerCpuMemoryToOS() { memcpy(&prev_allowed_cpus, &allowed_cpus, sizeof(cpu_set_t)); } -void ShuffleCpuCaches() { - if (!MallocExtension::PerCpuCachesActive()) { - return; - } - - // Shuffle per-cpu caches - Static::cpu_cache().ShuffleCpuCaches(); -} - -// Reclaims per-cpu caches. The CPU mask used in ReleasePerCpuMemoryToOS does -// not provide useful information about virtual CPU state and hence, does not -// reclaim memory when virtual CPUs are enabled. -// -// Here, we use heuristics that are based on cache usage and misses, to -// determine if the caches have been recently inactive and if they may be -// reclaimed. -void ReclaimIdleCpuCaches() { - // Attempts reclaim only when per-CPU caches are in use. - if (!MallocExtension::PerCpuCachesActive()) { - return; - } - - Static::cpu_cache().TryReclaimingCaches(); -} - +void ShuffleCpuCaches() { + if (!MallocExtension::PerCpuCachesActive()) { + return; + } + + // Shuffle per-cpu caches + Static::cpu_cache().ShuffleCpuCaches(); +} + +// Reclaims per-cpu caches. The CPU mask used in ReleasePerCpuMemoryToOS does +// not provide useful information about virtual CPU state and hence, does not +// reclaim memory when virtual CPUs are enabled. +// +// Here, we use heuristics that are based on cache usage and misses, to +// determine if the caches have been recently inactive and if they may be +// reclaimed. +void ReclaimIdleCpuCaches() { + // Attempts reclaim only when per-CPU caches are in use. + if (!MallocExtension::PerCpuCachesActive()) { + return; + } + + Static::cpu_cache().TryReclaimingCaches(); +} + } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END // Release memory to the system at a constant rate. void MallocExtension_Internal_ProcessBackgroundActions() { tcmalloc::MallocExtension::MarkThreadIdle(); // Initialize storage for ReleasePerCpuMemoryToOS(). - CPU_ZERO(&tcmalloc::tcmalloc_internal::prev_allowed_cpus); + CPU_ZERO(&tcmalloc::tcmalloc_internal::prev_allowed_cpus); absl::Time prev_time = absl::Now(); constexpr absl::Duration kSleepTime = absl::Seconds(1); - - // Reclaim inactive per-cpu caches once per kCpuCacheReclaimPeriod. - // - // We use a longer 30 sec reclaim period to make sure that caches are indeed - // idle. Reclaim drains entire cache, as opposed to cache shuffle for instance - // that only shrinks a cache by a few objects at a time. So, we might have - // larger performance degradation if we use a shorter reclaim interval and - // drain caches that weren't supposed to. - constexpr absl::Duration kCpuCacheReclaimPeriod = absl::Seconds(30); - absl::Time last_reclaim = absl::Now(); - - // Shuffle per-cpu caches once per kCpuCacheShufflePeriod secs. - constexpr absl::Duration kCpuCacheShufflePeriod = absl::Seconds(5); - absl::Time last_shuffle = absl::Now(); - + + // Reclaim inactive per-cpu caches once per kCpuCacheReclaimPeriod. + // + // We use a longer 30 sec reclaim period to make sure that caches are indeed + // idle. Reclaim drains entire cache, as opposed to cache shuffle for instance + // that only shrinks a cache by a few objects at a time. So, we might have + // larger performance degradation if we use a shorter reclaim interval and + // drain caches that weren't supposed to. + constexpr absl::Duration kCpuCacheReclaimPeriod = absl::Seconds(30); + absl::Time last_reclaim = absl::Now(); + + // Shuffle per-cpu caches once per kCpuCacheShufflePeriod secs. + constexpr absl::Duration kCpuCacheShufflePeriod = absl::Seconds(5); + absl::Time last_shuffle = absl::Now(); + while (true) { absl::Time now = absl::Now(); const ssize_t bytes_to_release = - static_cast<size_t>(tcmalloc::tcmalloc_internal::Parameters:: - background_release_rate()) * + static_cast<size_t>(tcmalloc::tcmalloc_internal::Parameters:: + background_release_rate()) * absl::ToDoubleSeconds(now - prev_time); if (bytes_to_release > 0) { // may be negative if time goes backwards tcmalloc::MallocExtension::ReleaseMemoryToSystem(bytes_to_release); } - const bool reclaim_idle_per_cpu_caches = - tcmalloc::tcmalloc_internal::Parameters::reclaim_idle_per_cpu_caches(); - - // If enabled, we use heuristics to determine if the per-cpu caches are - // inactive. If disabled, we use a more conservative approach, that uses - // allowed cpu masks, to reclaim cpu caches. - if (reclaim_idle_per_cpu_caches) { - // Try to reclaim per-cpu caches once every kCpuCacheReclaimPeriod - // when enabled. - if (now - last_reclaim >= kCpuCacheReclaimPeriod) { - tcmalloc::tcmalloc_internal::ReclaimIdleCpuCaches(); - last_reclaim = now; - } - } else { - tcmalloc::tcmalloc_internal::ReleasePerCpuMemoryToOS(); - } - - const bool shuffle_per_cpu_caches = - tcmalloc::tcmalloc_internal::Parameters::shuffle_per_cpu_caches(); - - if (shuffle_per_cpu_caches) { - if (now - last_shuffle >= kCpuCacheShufflePeriod) { - tcmalloc::tcmalloc_internal::ShuffleCpuCaches(); - last_shuffle = now; - } - } - - tcmalloc::tcmalloc_internal::Static().sharded_transfer_cache().Plunder(); + const bool reclaim_idle_per_cpu_caches = + tcmalloc::tcmalloc_internal::Parameters::reclaim_idle_per_cpu_caches(); + + // If enabled, we use heuristics to determine if the per-cpu caches are + // inactive. If disabled, we use a more conservative approach, that uses + // allowed cpu masks, to reclaim cpu caches. + if (reclaim_idle_per_cpu_caches) { + // Try to reclaim per-cpu caches once every kCpuCacheReclaimPeriod + // when enabled. + if (now - last_reclaim >= kCpuCacheReclaimPeriod) { + tcmalloc::tcmalloc_internal::ReclaimIdleCpuCaches(); + last_reclaim = now; + } + } else { + tcmalloc::tcmalloc_internal::ReleasePerCpuMemoryToOS(); + } + + const bool shuffle_per_cpu_caches = + tcmalloc::tcmalloc_internal::Parameters::shuffle_per_cpu_caches(); + + if (shuffle_per_cpu_caches) { + if (now - last_shuffle >= kCpuCacheShufflePeriod) { + tcmalloc::tcmalloc_internal::ShuffleCpuCaches(); + last_shuffle = now; + } + } + + tcmalloc::tcmalloc_internal::Static().sharded_transfer_cache().Plunder(); prev_time = now; absl::SleepFor(kSleepTime); } diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc index 8620e228a1..09d6798839 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc @@ -24,15 +24,15 @@ #include "tcmalloc/pages.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { static MemoryTag MemoryTagFromSizeClass(size_t cl) { - if (!Static::numa_topology().numa_aware()) { - return MemoryTag::kNormal; - } - return NumaNormalTag(cl / kNumBaseClasses); + if (!Static::numa_topology().numa_aware()) { + return MemoryTag::kNormal; + } + return NumaNormalTag(cl / kNumBaseClasses); } // Like a constructor and hence we disable thread safety analysis. @@ -50,28 +50,28 @@ static Span* MapObjectToSpan(void* object) { return span; } -Span* CentralFreeList::ReleaseToSpans(void* object, Span* span, - size_t object_size) { - if (ABSL_PREDICT_FALSE(span->FreelistEmpty(object_size))) { +Span* CentralFreeList::ReleaseToSpans(void* object, Span* span, + size_t object_size) { + if (ABSL_PREDICT_FALSE(span->FreelistEmpty(object_size))) { nonempty_.prepend(span); } - if (ABSL_PREDICT_TRUE(span->FreelistPush(object, object_size))) { + if (ABSL_PREDICT_TRUE(span->FreelistPush(object, object_size))) { return nullptr; } span->RemoveFromList(); // from nonempty_ return span; } -void CentralFreeList::InsertRange(absl::Span<void*> batch) { - CHECK_CONDITION(!batch.empty() && batch.size() <= kMaxObjectsToMove); +void CentralFreeList::InsertRange(absl::Span<void*> batch) { + CHECK_CONDITION(!batch.empty() && batch.size() <= kMaxObjectsToMove); Span* spans[kMaxObjectsToMove]; // Safe to store free spans into freed up space in span array. Span** free_spans = spans; int free_count = 0; // Prefetch Span objects to reduce cache misses. - for (int i = 0; i < batch.size(); ++i) { + for (int i = 0; i < batch.size(); ++i) { Span* span = MapObjectToSpan(batch[i]); ASSERT(span != nullptr); span->Prefetch(); @@ -81,55 +81,55 @@ void CentralFreeList::InsertRange(absl::Span<void*> batch) { // First, release all individual objects into spans under our mutex // and collect spans that become completely free. { - // Use local copy of variable to ensure that it is not reloaded. - size_t object_size = object_size_; + // Use local copy of variable to ensure that it is not reloaded. + size_t object_size = object_size_; absl::base_internal::SpinLockHolder h(&lock_); - for (int i = 0; i < batch.size(); ++i) { - Span* span = ReleaseToSpans(batch[i], spans[i], object_size); - if (ABSL_PREDICT_FALSE(span)) { + for (int i = 0; i < batch.size(); ++i) { + Span* span = ReleaseToSpans(batch[i], spans[i], object_size); + if (ABSL_PREDICT_FALSE(span)) { free_spans[free_count] = span; free_count++; } } - + RecordMultiSpansDeallocated(free_count); - UpdateObjectCounts(batch.size()); + UpdateObjectCounts(batch.size()); } // Then, release all free spans into page heap under its mutex. - if (ABSL_PREDICT_FALSE(free_count)) { - // Unregister size class doesn't require holding any locks. + if (ABSL_PREDICT_FALSE(free_count)) { + // Unregister size class doesn't require holding any locks. for (int i = 0; i < free_count; ++i) { Span* const free_span = free_spans[i]; ASSERT(IsNormalMemory(free_span->start_address()) ); Static::pagemap().UnregisterSizeClass(free_span); - - // Before taking pageheap_lock, prefetch the PageTrackers these spans are - // on. - // - // Small-but-slow does not use the HugePageAwareAllocator (by default), so - // do not prefetch on this config. -#ifndef TCMALLOC_SMALL_BUT_SLOW - const PageId p = free_span->first_page(); - - // In huge_page_filler.h, we static_assert that PageTracker's key elements - // for deallocation are within the first two cachelines. - void* pt = Static::pagemap().GetHugepage(p); - // Prefetch for writing, as we will issue stores to the PageTracker - // instance. - __builtin_prefetch(pt, 1, 3); - __builtin_prefetch( - reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pt) + - ABSL_CACHELINE_SIZE), - 1, 3); -#endif // TCMALLOC_SMALL_BUT_SLOW - } - - const MemoryTag tag = MemoryTagFromSizeClass(size_class_); - absl::base_internal::SpinLockHolder h(&pageheap_lock); - for (int i = 0; i < free_count; ++i) { - Span* const free_span = free_spans[i]; + + // Before taking pageheap_lock, prefetch the PageTrackers these spans are + // on. + // + // Small-but-slow does not use the HugePageAwareAllocator (by default), so + // do not prefetch on this config. +#ifndef TCMALLOC_SMALL_BUT_SLOW + const PageId p = free_span->first_page(); + + // In huge_page_filler.h, we static_assert that PageTracker's key elements + // for deallocation are within the first two cachelines. + void* pt = Static::pagemap().GetHugepage(p); + // Prefetch for writing, as we will issue stores to the PageTracker + // instance. + __builtin_prefetch(pt, 1, 3); + __builtin_prefetch( + reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pt) + + ABSL_CACHELINE_SIZE), + 1, 3); +#endif // TCMALLOC_SMALL_BUT_SLOW + } + + const MemoryTag tag = MemoryTagFromSizeClass(size_class_); + absl::base_internal::SpinLockHolder h(&pageheap_lock); + for (int i = 0; i < free_count; ++i) { + Span* const free_span = free_spans[i]; ASSERT(tag == GetMemoryTag(free_span->start_address())); Static::page_allocator().Delete(free_span, tag); } @@ -138,64 +138,64 @@ void CentralFreeList::InsertRange(absl::Span<void*> batch) { int CentralFreeList::RemoveRange(void** batch, int N) { ASSUME(N > 0); - // Use local copy of variable to ensure that it is not reloaded. - size_t object_size = object_size_; - int result = 0; + // Use local copy of variable to ensure that it is not reloaded. + size_t object_size = object_size_; + int result = 0; absl::base_internal::SpinLockHolder h(&lock_); - if (ABSL_PREDICT_FALSE(nonempty_.empty())) { - result = Populate(batch, N); - } else { - do { - Span* span = nonempty_.first(); - int here = - span->FreelistPopBatch(batch + result, N - result, object_size); - ASSERT(here > 0); - if (span->FreelistEmpty(object_size)) { - span->RemoveFromList(); // from nonempty_ - } - result += here; - } while (result < N && !nonempty_.empty()); + if (ABSL_PREDICT_FALSE(nonempty_.empty())) { + result = Populate(batch, N); + } else { + do { + Span* span = nonempty_.first(); + int here = + span->FreelistPopBatch(batch + result, N - result, object_size); + ASSERT(here > 0); + if (span->FreelistEmpty(object_size)) { + span->RemoveFromList(); // from nonempty_ + } + result += here; + } while (result < N && !nonempty_.empty()); } UpdateObjectCounts(-result); return result; } // Fetch memory from the system and add to the central cache freelist. -int CentralFreeList::Populate(void** batch, - int N) ABSL_NO_THREAD_SAFETY_ANALYSIS { +int CentralFreeList::Populate(void** batch, + int N) ABSL_NO_THREAD_SAFETY_ANALYSIS { // Release central list lock while operating on pageheap - // Note, this could result in multiple calls to populate each allocating - // a new span and the pushing those partially full spans onto nonempty. + // Note, this could result in multiple calls to populate each allocating + // a new span and the pushing those partially full spans onto nonempty. lock_.Unlock(); const MemoryTag tag = MemoryTagFromSizeClass(size_class_); Span* span = Static::page_allocator().New(pages_per_span_, tag); - if (ABSL_PREDICT_FALSE(span == nullptr)) { + if (ABSL_PREDICT_FALSE(span == nullptr)) { Log(kLog, __FILE__, __LINE__, "tcmalloc: allocation failed", pages_per_span_.in_bytes()); lock_.Lock(); - return 0; + return 0; } - ASSERT(tag == GetMemoryTag(span->start_address())); + ASSERT(tag == GetMemoryTag(span->start_address())); ASSERT(span->num_pages() == pages_per_span_); Static::pagemap().RegisterSizeClass(span, size_class_); - size_t objects_per_span = objects_per_span_; - int result = span->BuildFreelist(object_size_, objects_per_span, batch, N); - ASSERT(result > 0); - // This is a cheaper check than using FreelistEmpty(). - bool span_empty = result == objects_per_span; + size_t objects_per_span = objects_per_span_; + int result = span->BuildFreelist(object_size_, objects_per_span, batch, N); + ASSERT(result > 0); + // This is a cheaper check than using FreelistEmpty(). + bool span_empty = result == objects_per_span; lock_.Lock(); - if (!span_empty) { - nonempty_.prepend(span); - } + if (!span_empty) { + nonempty_.prepend(span); + } RecordSpanAllocated(); - return result; + return result; } -size_t CentralFreeList::OverheadBytes() const { - if (ABSL_PREDICT_FALSE(object_size_ == 0)) { +size_t CentralFreeList::OverheadBytes() const { + if (ABSL_PREDICT_FALSE(object_size_ == 0)) { return 0; } const size_t overhead_per_span = pages_per_span_.in_bytes() % object_size_; @@ -204,7 +204,7 @@ size_t CentralFreeList::OverheadBytes() const { SpanStats CentralFreeList::GetSpanStats() const { SpanStats stats; - if (ABSL_PREDICT_FALSE(objects_per_span_ == 0)) { + if (ABSL_PREDICT_FALSE(objects_per_span_ == 0)) { return stats; } stats.num_spans_requested = static_cast<size_t>(num_spans_requested_.value()); @@ -213,6 +213,6 @@ SpanStats CentralFreeList::GetSpanStats() const { return stats; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h index 266f184d6b..3d766af8c3 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist.h +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h @@ -28,9 +28,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/span_stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Data kept per size-class in central cache. class CentralFreeList { @@ -50,9 +50,9 @@ class CentralFreeList { // These methods all do internal locking. - // Insert batch into the central freelist. - // REQUIRES: batch.size() > 0 && batch.size() <= kMaxObjectsToMove. - void InsertRange(absl::Span<void*> batch) ABSL_LOCKS_EXCLUDED(lock_); + // Insert batch into the central freelist. + // REQUIRES: batch.size() > 0 && batch.size() <= kMaxObjectsToMove. + void InsertRange(absl::Span<void*> batch) ABSL_LOCKS_EXCLUDED(lock_); // Fill a prefix of batch[0..N-1] with up to N elements removed from central // freelist. Return the number of elements removed. @@ -60,35 +60,35 @@ class CentralFreeList { ABSL_LOCKS_EXCLUDED(lock_); // Returns the number of free objects in cache. - size_t length() const { return static_cast<size_t>(counter_.value()); } + size_t length() const { return static_cast<size_t>(counter_.value()); } // Returns the memory overhead (internal fragmentation) attributable // to the freelist. This is memory lost when the size of elements // in a freelist doesn't exactly divide the page-size (an 8192-byte // page full of 5-byte objects would have 2 bytes memory overhead). - size_t OverheadBytes() const; + size_t OverheadBytes() const; SpanStats GetSpanStats() const; - void AcquireInternalLocks() { - lock_.Lock(); - } - - void ReleaseInternalLocks() { - lock_.Unlock(); - } - + void AcquireInternalLocks() { + lock_.Lock(); + } + + void ReleaseInternalLocks() { + lock_.Unlock(); + } + private: // Release an object to spans. // Returns object's span if it become completely free. - Span* ReleaseToSpans(void* object, Span* span, size_t object_size) + Span* ReleaseToSpans(void* object, Span* span, size_t object_size) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); // Populate cache by fetching from the page heap. // May temporarily release lock_. - // Fill a prefix of batch[0..N-1] with up to N elements removed from central - // freelist. Returns the number of elements removed. - int Populate(void** batch, int N) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); + // Fill a prefix of batch[0..N-1] with up to N elements removed from central + // freelist. Returns the number of elements removed. + int Populate(void** batch, int N) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_); // This lock protects all the mutable data members. absl::base_internal::SpinLock lock_; @@ -126,17 +126,17 @@ class CentralFreeList { // guarantees accuracy. // Num free objects in cache entry - StatsCounter counter_; + StatsCounter counter_; - StatsCounter num_spans_requested_; - StatsCounter num_spans_returned_; + StatsCounter num_spans_requested_; + StatsCounter num_spans_returned_; // Dummy header for non-empty spans SpanList nonempty_ ABSL_GUARDED_BY(lock_); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_CENTRAL_FREELIST_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc index a80d580753..ac0768a3d7 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc @@ -1,198 +1,198 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <algorithm> -#include <cstddef> -#include <cstdint> -#include <vector> - -#include "absl/algorithm/container.h" -#include "absl/random/random.h" -#include "benchmark/benchmark.h" -#include "tcmalloc/central_freelist.h" -#include "tcmalloc/common.h" -#include "tcmalloc/static_vars.h" -#include "tcmalloc/tcmalloc_policy.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -// This benchmark measures how long it takes to populate multiple -// spans. The spans are freed in the same order as they were populated -// to minimize the time it takes to free them. -void BM_Populate(benchmark::State& state) { - size_t object_size = state.range(0); - size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); - int batch_size = Static::sizemap().num_objects_to_move(cl); - int num_objects = 64 * 1024 * 1024 / object_size; - CentralFreeList cfl; - // Initialize the span to contain the appropriate size of object. - cfl.Init(cl); - - // Allocate an array large enough to hold 64 MiB of objects. - std::vector<void*> buffer(num_objects); - int64_t items_processed = 0; - absl::BitGen rnd; - - for (auto s : state) { - int index = 0; - // The cost of fetching objects will include the cost of fetching and - // populating the span. - while (index < num_objects) { - int count = std::min(batch_size, num_objects - index); - int got = cfl.RemoveRange(&buffer[index], count); - index += got; - } - - // Don't include the cost of returning the objects to the span, and the - // span to the pageheap. - state.PauseTiming(); - index = 0; - while (index < num_objects) { - uint64_t count = std::min(batch_size, num_objects - index); - cfl.InsertRange({&buffer[index], count}); - index += count; - } - items_processed += index; - state.ResumeTiming(); - } - state.SetItemsProcessed(items_processed); -} -BENCHMARK(BM_Populate) - ->DenseRange(8, 64, 16) - ->DenseRange(64, 1024, 64) - ->DenseRange(4096, 28 * 1024, 4096) - ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024); - -// This benchmark fills a large array with objects, shuffles the objects -// and then returns them. -// This should be relatively representative of what happens at runtime. -// Fetching objects from the CFL is usually done in batches, but returning -// them is usually done spread over many active spans. -void BM_MixAndReturn(benchmark::State& state) { - size_t object_size = state.range(0); - size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); - int batch_size = Static::sizemap().num_objects_to_move(cl); - int num_objects = 64 * 1024 * 1024 / object_size; - CentralFreeList cfl; - // Initialize the span to contain the appropriate size of object. - cfl.Init(cl); - - // Allocate an array large enough to hold 64 MiB of objects. - std::vector<void*> buffer(num_objects); - int64_t items_processed = 0; - absl::BitGen rnd; - - for (auto s : state) { - int index = 0; - while (index < num_objects) { - int count = std::min(batch_size, num_objects - index); - int got = cfl.RemoveRange(&buffer[index], count); - index += got; - } - - state.PauseTiming(); - // Shuffle the vector so that we don't return the objects in the same - // order as they were allocated. - absl::c_shuffle(buffer, rnd); - state.ResumeTiming(); - - index = 0; - while (index < num_objects) { - unsigned int count = std::min(batch_size, num_objects - index); - cfl.InsertRange({&buffer[index], count}); - index += count; - } - items_processed += index; - } - state.SetItemsProcessed(items_processed); -} -BENCHMARK(BM_MixAndReturn) - ->DenseRange(8, 64, 16) - ->DenseRange(64, 1024, 64) - ->DenseRange(4096, 28 * 1024, 4096) - ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024); - -// This benchmark holds onto half the allocated objects so that (except for -// single object spans) spans are never allocated or freed during the -// benchmark run. This evaluates the performance of just the span handling -// code, and avoids timing the pageheap code. -void BM_SpanReuse(benchmark::State& state) { - size_t object_size = state.range(0); - size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); - int batch_size = Static::sizemap().num_objects_to_move(cl); - int num_objects = 64 * 1024 * 1024 / object_size; - CentralFreeList cfl; - // Initialize the span to contain the appropriate size of object. - cfl.Init(cl); - - // Array used to hold onto half of the objects - std::vector<void*> held_objects(2 * num_objects); - // Request twice the objects we need - for (int index = 0; index < 2 * num_objects;) { - int count = std::min(batch_size, 2 * num_objects - index); - int got = cfl.RemoveRange(&held_objects[index], count); - index += got; - } - - // Return half of the objects. This will stop the spans from being - // returned to the pageheap. So future operations will not touch the - // pageheap. - for (int index = 0; index < 2 * num_objects; index += 2) { - cfl.InsertRange({&held_objects[index], 1}); - } - // Allocate an array large enough to hold 64 MiB of objects. - std::vector<void*> buffer(num_objects); - int64_t items_processed = 0; - absl::BitGen rnd; - - for (auto s : state) { - int index = 0; - while (index < num_objects) { - int count = std::min(batch_size, num_objects - index); - int got = cfl.RemoveRange(&buffer[index], count); - index += got; - } - - state.PauseTiming(); - // Shuffle the vector so that we don't return the objects in the same - // order as they were allocated. - absl::c_shuffle(buffer, rnd); - state.ResumeTiming(); - - index = 0; - while (index < num_objects) { - uint64_t count = std::min(batch_size, num_objects - index); - cfl.InsertRange({&buffer[index], count}); - index += count; - } - items_processed += index; - } - state.SetItemsProcessed(items_processed); - - // Return the other half of the objects. - for (int index = 1; index < 2 * num_objects; index += 2) { - cfl.InsertRange({&held_objects[index], 1}); - } -} -// Want to avoid benchmarking spans where there is a single object per span. -BENCHMARK(BM_SpanReuse) - ->DenseRange(8, 64, 16) - ->DenseRange(64, 1024, 64) - ->DenseRange(1024, 4096, 512); - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <vector> + +#include "absl/algorithm/container.h" +#include "absl/random/random.h" +#include "benchmark/benchmark.h" +#include "tcmalloc/central_freelist.h" +#include "tcmalloc/common.h" +#include "tcmalloc/static_vars.h" +#include "tcmalloc/tcmalloc_policy.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +// This benchmark measures how long it takes to populate multiple +// spans. The spans are freed in the same order as they were populated +// to minimize the time it takes to free them. +void BM_Populate(benchmark::State& state) { + size_t object_size = state.range(0); + size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); + int batch_size = Static::sizemap().num_objects_to_move(cl); + int num_objects = 64 * 1024 * 1024 / object_size; + CentralFreeList cfl; + // Initialize the span to contain the appropriate size of object. + cfl.Init(cl); + + // Allocate an array large enough to hold 64 MiB of objects. + std::vector<void*> buffer(num_objects); + int64_t items_processed = 0; + absl::BitGen rnd; + + for (auto s : state) { + int index = 0; + // The cost of fetching objects will include the cost of fetching and + // populating the span. + while (index < num_objects) { + int count = std::min(batch_size, num_objects - index); + int got = cfl.RemoveRange(&buffer[index], count); + index += got; + } + + // Don't include the cost of returning the objects to the span, and the + // span to the pageheap. + state.PauseTiming(); + index = 0; + while (index < num_objects) { + uint64_t count = std::min(batch_size, num_objects - index); + cfl.InsertRange({&buffer[index], count}); + index += count; + } + items_processed += index; + state.ResumeTiming(); + } + state.SetItemsProcessed(items_processed); +} +BENCHMARK(BM_Populate) + ->DenseRange(8, 64, 16) + ->DenseRange(64, 1024, 64) + ->DenseRange(4096, 28 * 1024, 4096) + ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024); + +// This benchmark fills a large array with objects, shuffles the objects +// and then returns them. +// This should be relatively representative of what happens at runtime. +// Fetching objects from the CFL is usually done in batches, but returning +// them is usually done spread over many active spans. +void BM_MixAndReturn(benchmark::State& state) { + size_t object_size = state.range(0); + size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); + int batch_size = Static::sizemap().num_objects_to_move(cl); + int num_objects = 64 * 1024 * 1024 / object_size; + CentralFreeList cfl; + // Initialize the span to contain the appropriate size of object. + cfl.Init(cl); + + // Allocate an array large enough to hold 64 MiB of objects. + std::vector<void*> buffer(num_objects); + int64_t items_processed = 0; + absl::BitGen rnd; + + for (auto s : state) { + int index = 0; + while (index < num_objects) { + int count = std::min(batch_size, num_objects - index); + int got = cfl.RemoveRange(&buffer[index], count); + index += got; + } + + state.PauseTiming(); + // Shuffle the vector so that we don't return the objects in the same + // order as they were allocated. + absl::c_shuffle(buffer, rnd); + state.ResumeTiming(); + + index = 0; + while (index < num_objects) { + unsigned int count = std::min(batch_size, num_objects - index); + cfl.InsertRange({&buffer[index], count}); + index += count; + } + items_processed += index; + } + state.SetItemsProcessed(items_processed); +} +BENCHMARK(BM_MixAndReturn) + ->DenseRange(8, 64, 16) + ->DenseRange(64, 1024, 64) + ->DenseRange(4096, 28 * 1024, 4096) + ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024); + +// This benchmark holds onto half the allocated objects so that (except for +// single object spans) spans are never allocated or freed during the +// benchmark run. This evaluates the performance of just the span handling +// code, and avoids timing the pageheap code. +void BM_SpanReuse(benchmark::State& state) { + size_t object_size = state.range(0); + size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size); + int batch_size = Static::sizemap().num_objects_to_move(cl); + int num_objects = 64 * 1024 * 1024 / object_size; + CentralFreeList cfl; + // Initialize the span to contain the appropriate size of object. + cfl.Init(cl); + + // Array used to hold onto half of the objects + std::vector<void*> held_objects(2 * num_objects); + // Request twice the objects we need + for (int index = 0; index < 2 * num_objects;) { + int count = std::min(batch_size, 2 * num_objects - index); + int got = cfl.RemoveRange(&held_objects[index], count); + index += got; + } + + // Return half of the objects. This will stop the spans from being + // returned to the pageheap. So future operations will not touch the + // pageheap. + for (int index = 0; index < 2 * num_objects; index += 2) { + cfl.InsertRange({&held_objects[index], 1}); + } + // Allocate an array large enough to hold 64 MiB of objects. + std::vector<void*> buffer(num_objects); + int64_t items_processed = 0; + absl::BitGen rnd; + + for (auto s : state) { + int index = 0; + while (index < num_objects) { + int count = std::min(batch_size, num_objects - index); + int got = cfl.RemoveRange(&buffer[index], count); + index += got; + } + + state.PauseTiming(); + // Shuffle the vector so that we don't return the objects in the same + // order as they were allocated. + absl::c_shuffle(buffer, rnd); + state.ResumeTiming(); + + index = 0; + while (index < num_objects) { + uint64_t count = std::min(batch_size, num_objects - index); + cfl.InsertRange({&buffer[index], count}); + index += count; + } + items_processed += index; + } + state.SetItemsProcessed(items_processed); + + // Return the other half of the objects. + for (int index = 1; index < 2 * num_objects; index += 2) { + cfl.InsertRange({&held_objects[index], 1}); + } +} +// Want to avoid benchmarking spans where there is a single object per span. +BENCHMARK(BM_SpanReuse) + ->DenseRange(8, 64, 16) + ->DenseRange(64, 1024, 64) + ->DenseRange(1024, 4096, 512); + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc index de5960120d..f951303af0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc @@ -23,7 +23,7 @@ #include "tcmalloc/static_vars.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // TODO(b/162552708) Mock out the page heap to interact with CFL instead @@ -53,9 +53,9 @@ class CFLTest : public testing::TestWithParam<size_t> { TEST_P(CFLTest, SingleBatch) { void* batch[kMaxObjectsToMove]; - uint64_t got = cfl_.RemoveRange(batch, batch_size_); - ASSERT_GT(got, 0); - cfl_.InsertRange({batch, got}); + uint64_t got = cfl_.RemoveRange(batch, batch_size_); + ASSERT_GT(got, 0); + cfl_.InsertRange({batch, got}); SpanStats stats = cfl_.GetSpanStats(); EXPECT_EQ(stats.num_spans_requested, 1); EXPECT_EQ(stats.num_spans_returned, 1); @@ -72,8 +72,8 @@ TEST_P(CFLTest, MultipleSpans) { const int num_objects_to_fetch = num_spans * objects_per_span_; int total_fetched = 0; while (total_fetched < num_objects_to_fetch) { - size_t n = num_objects_to_fetch - total_fetched; - int got = cfl_.RemoveRange(batch, std::min(n, batch_size_)); + size_t n = num_objects_to_fetch - total_fetched; + int got = cfl_.RemoveRange(batch, std::min(n, batch_size_)); for (int i = 0; i < got; ++i) { all_objects.push_back(batch[i]); } @@ -94,13 +94,13 @@ TEST_P(CFLTest, MultipleSpans) { int total_returned = 0; bool checked_half = false; while (total_returned < num_objects_to_fetch) { - uint64_t size_to_pop = + uint64_t size_to_pop = std::min(all_objects.size() - total_returned, batch_size_); for (int i = 0; i < size_to_pop; ++i) { batch[i] = all_objects[i + total_returned]; } total_returned += size_to_pop; - cfl_.InsertRange({batch, size_to_pop}); + cfl_.InsertRange({batch, size_to_pop}); // sanity check if (!checked_half && total_returned >= (num_objects_to_fetch / 2)) { stats = cfl_.GetSpanStats(); @@ -117,5 +117,5 @@ TEST_P(CFLTest, MultipleSpans) { INSTANTIATE_TEST_SUITE_P(All, CFLTest, testing::Range(size_t(1), kNumClasses)); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/common.cc b/contrib/libs/tcmalloc/tcmalloc/common.cc index 38443040ca..85b30fee12 100644 --- a/contrib/libs/tcmalloc/tcmalloc/common.cc +++ b/contrib/libs/tcmalloc/tcmalloc/common.cc @@ -15,22 +15,22 @@ #include "tcmalloc/common.h" #include "tcmalloc/experiment.h" -#include "tcmalloc/internal/environment.h" +#include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/optimization.h" -#include "tcmalloc/pages.h" +#include "tcmalloc/pages.h" #include "tcmalloc/runtime_size_classes.h" #include "tcmalloc/sampler.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { absl::string_view MemoryTagToLabel(MemoryTag tag) { switch (tag) { case MemoryTag::kNormal: return "NORMAL"; - case MemoryTag::kNormalP1: - return "NORMAL_P1"; + case MemoryTag::kNormalP1: + return "NORMAL_P1"; case MemoryTag::kSampled: return "SAMPLED"; default: @@ -73,21 +73,21 @@ void SizeMap::SetSizeClasses(int num_classes, const SizeClassInfo* parsed) { } // Fill any unspecified size classes with 0. - for (int x = num_classes; x < kNumBaseClasses; x++) { + for (int x = num_classes; x < kNumBaseClasses; x++) { class_to_size_[x] = 0; class_to_pages_[x] = 0; num_objects_to_move_[x] = 0; } - - // Copy selected size classes into the upper registers. - for (int i = 1; i < (kNumClasses / kNumBaseClasses); i++) { - std::copy(&class_to_size_[0], &class_to_size_[kNumBaseClasses], - &class_to_size_[kNumBaseClasses * i]); - std::copy(&class_to_pages_[0], &class_to_pages_[kNumBaseClasses], - &class_to_pages_[kNumBaseClasses * i]); - std::copy(&num_objects_to_move_[0], &num_objects_to_move_[kNumBaseClasses], - &num_objects_to_move_[kNumBaseClasses * i]); - } + + // Copy selected size classes into the upper registers. + for (int i = 1; i < (kNumClasses / kNumBaseClasses); i++) { + std::copy(&class_to_size_[0], &class_to_size_[kNumBaseClasses], + &class_to_size_[kNumBaseClasses * i]); + std::copy(&class_to_pages_[0], &class_to_pages_[kNumBaseClasses], + &class_to_pages_[kNumBaseClasses * i]); + std::copy(&num_objects_to_move_[0], &num_objects_to_move_[kNumBaseClasses], + &num_objects_to_move_[kNumBaseClasses * i]); + } } // Return true if all size classes meet the requirements for alignment @@ -96,8 +96,8 @@ bool SizeMap::ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) { if (num_classes <= 0) { return false; } - if (kHasExpandedClasses && num_classes > kNumBaseClasses) { - num_classes = kNumBaseClasses; + if (kHasExpandedClasses && num_classes > kNumBaseClasses) { + num_classes = kNumBaseClasses; } for (int c = 1; c < num_classes; c++) { @@ -167,17 +167,17 @@ void SizeMap::Init() { static_assert(kAlignment <= 16, "kAlignment is too large"); - if (IsExperimentActive(Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS)) { - SetSizeClasses(kExperimentalPow2SizeClassesCount, - kExperimentalPow2SizeClasses); - } else if (IsExperimentActive( - Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS)) { - SetSizeClasses(kExperimentalPow2Below64SizeClassesCount, - kExperimentalPow2Below64SizeClasses); + if (IsExperimentActive(Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS)) { + SetSizeClasses(kExperimentalPow2SizeClassesCount, + kExperimentalPow2SizeClasses); + } else if (IsExperimentActive( + Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS)) { + SetSizeClasses(kExperimentalPow2Below64SizeClassesCount, + kExperimentalPow2Below64SizeClasses); } else { if (default_want_legacy_spans != nullptr && - default_want_legacy_spans() > 0 - ) { + default_want_legacy_spans() > 0 + ) { SetSizeClasses(kLegacySizeClassesCount, kLegacySizeClasses); } else { SetSizeClasses(kSizeClassesCount, kSizeClasses); @@ -199,6 +199,6 @@ void SizeMap::Init() { } } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/common.h b/contrib/libs/tcmalloc/tcmalloc/common.h index d44811c726..43e2aa5f7b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/common.h +++ b/contrib/libs/tcmalloc/tcmalloc/common.h @@ -20,27 +20,27 @@ #include <stddef.h> #include <stdint.h> -#include <limits> -#include <type_traits> - +#include <limits> +#include <type_traits> + #include "absl/base/attributes.h" #include "absl/base/dynamic_annotations.h" #include "absl/base/internal/spinlock.h" -#include "absl/base/macros.h" +#include "absl/base/macros.h" #include "absl/base/optimization.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "absl/strings/string_view.h" -#include "absl/types/span.h" -#include "tcmalloc/experiment.h" +#include "absl/types/span.h" +#include "tcmalloc/experiment.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/optimization.h" #include "tcmalloc/size_class_info.h" -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + //------------------------------------------------------------------- // Configuration //------------------------------------------------------------------- @@ -79,7 +79,7 @@ namespace tcmalloc_internal { // The constants that vary between models are: // // kPageShift - Shift amount used to compute the page size. -// kNumBaseClasses - Number of size classes serviced by bucket allocators +// kNumBaseClasses - Number of size classes serviced by bucket allocators // kMaxSize - Maximum size serviced by bucket allocators (thread/cpu/central) // kMinThreadCacheSize - The minimum size in bytes of each ThreadCache. // kMaxThreadCacheSize - The maximum size in bytes of each ThreadCache. @@ -109,7 +109,7 @@ namespace tcmalloc_internal { #if TCMALLOC_PAGE_SHIFT == 12 inline constexpr size_t kPageShift = 12; -inline constexpr size_t kNumBaseClasses = 46; +inline constexpr size_t kNumBaseClasses = 46; inline constexpr bool kHasExpandedClasses = false; inline constexpr size_t kMaxSize = 8 << 10; inline constexpr size_t kMinThreadCacheSize = 4 * 1024; @@ -121,7 +121,7 @@ inline constexpr size_t kDefaultProfileSamplingRate = 1 << 19; inline constexpr size_t kMinPages = 2; #elif TCMALLOC_PAGE_SHIFT == 15 inline constexpr size_t kPageShift = 15; -inline constexpr size_t kNumBaseClasses = 78; +inline constexpr size_t kNumBaseClasses = 78; inline constexpr bool kHasExpandedClasses = true; inline constexpr size_t kMaxSize = 256 * 1024; inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2; @@ -134,7 +134,7 @@ inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21; inline constexpr size_t kMinPages = 8; #elif TCMALLOC_PAGE_SHIFT == 18 inline constexpr size_t kPageShift = 18; -inline constexpr size_t kNumBaseClasses = 89; +inline constexpr size_t kNumBaseClasses = 89; inline constexpr bool kHasExpandedClasses = true; inline constexpr size_t kMaxSize = 256 * 1024; inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2; @@ -147,7 +147,7 @@ inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21; inline constexpr size_t kMinPages = 8; #elif TCMALLOC_PAGE_SHIFT == 13 inline constexpr size_t kPageShift = 13; -inline constexpr size_t kNumBaseClasses = 86; +inline constexpr size_t kNumBaseClasses = 86; inline constexpr bool kHasExpandedClasses = true; inline constexpr size_t kMaxSize = 256 * 1024; inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2; @@ -162,36 +162,36 @@ inline constexpr size_t kMinPages = 8; #error "Unsupported TCMALLOC_PAGE_SHIFT value!" #endif -// Sanitizers constrain the memory layout which causes problems with the -// enlarged tags required to represent NUMA partitions. Disable NUMA awareness -// to avoid failing to mmap memory. -#if defined(TCMALLOC_NUMA_AWARE) && !defined(MEMORY_SANITIZER) && \ - !defined(THREAD_SANITIZER) -inline constexpr size_t kNumaPartitions = 2; -#else -inline constexpr size_t kNumaPartitions = 1; -#endif - -// We have copies of kNumBaseClasses size classes for each NUMA node, followed -// by any expanded classes. -inline constexpr size_t kExpandedClassesStart = - kNumBaseClasses * kNumaPartitions; -inline constexpr size_t kNumClasses = - kExpandedClassesStart + (kHasExpandedClasses ? kNumBaseClasses : 0); - -// Size classes are often stored as uint32_t values, but there are some -// situations where we need to store a size class with as compact a -// representation as possible (e.g. in PageMap). Here we determine the integer -// type to use in these situations - i.e. the smallest integer type large -// enough to store values in the range [0,kNumClasses). -constexpr size_t kMaxClass = kNumClasses - 1; -using CompactSizeClass = - std::conditional_t<kMaxClass <= std::numeric_limits<uint8_t>::max(), - uint8_t, uint16_t>; - -// ~64K classes ought to be enough for anybody, but let's be sure. -static_assert(kMaxClass <= std::numeric_limits<CompactSizeClass>::max()); - +// Sanitizers constrain the memory layout which causes problems with the +// enlarged tags required to represent NUMA partitions. Disable NUMA awareness +// to avoid failing to mmap memory. +#if defined(TCMALLOC_NUMA_AWARE) && !defined(MEMORY_SANITIZER) && \ + !defined(THREAD_SANITIZER) +inline constexpr size_t kNumaPartitions = 2; +#else +inline constexpr size_t kNumaPartitions = 1; +#endif + +// We have copies of kNumBaseClasses size classes for each NUMA node, followed +// by any expanded classes. +inline constexpr size_t kExpandedClassesStart = + kNumBaseClasses * kNumaPartitions; +inline constexpr size_t kNumClasses = + kExpandedClassesStart + (kHasExpandedClasses ? kNumBaseClasses : 0); + +// Size classes are often stored as uint32_t values, but there are some +// situations where we need to store a size class with as compact a +// representation as possible (e.g. in PageMap). Here we determine the integer +// type to use in these situations - i.e. the smallest integer type large +// enough to store values in the range [0,kNumClasses). +constexpr size_t kMaxClass = kNumClasses - 1; +using CompactSizeClass = + std::conditional_t<kMaxClass <= std::numeric_limits<uint8_t>::max(), + uint8_t, uint16_t>; + +// ~64K classes ought to be enough for anybody, but let's be sure. +static_assert(kMaxClass <= std::numeric_limits<CompactSizeClass>::max()); + // Minimum/maximum number of batches in TransferCache per size class. // Actual numbers depends on a number of factors, see TransferCache::Init // for details. @@ -205,7 +205,7 @@ inline constexpr size_t kPageSize = 1 << kPageShift; // of increasing kMaxSize to be multiple of kPageSize is unclear. Object size // profile data indicates that the number of simultaneously live objects (of // size >= 256k) tends to be very small. Keeping those objects as 'large' -// objects won't cause too much memory waste, while heap memory reuse can be +// objects won't cause too much memory waste, while heap memory reuse can be // improved. Increasing kMaxSize to be too large has another bad side effect -- // the thread cache pressure is increased, which will in turn increase traffic // between central cache and thread cache, leading to performance degradation. @@ -214,7 +214,7 @@ static_assert((kMaxSize / kPageSize) >= kMinPages || kPageShift >= 18, inline constexpr size_t kAlignment = 8; // log2 (kAlignment) -inline constexpr size_t kAlignmentShift = absl::bit_width(kAlignment - 1u); +inline constexpr size_t kAlignmentShift = absl::bit_width(kAlignment - 1u); // The number of times that a deallocation can cause a freelist to // go over its max_length() before shrinking max_length(). @@ -228,18 +228,18 @@ inline constexpr int kMaxOverages = 3; inline constexpr int kMaxDynamicFreeListLength = 8192; enum class MemoryTag : uint8_t { - // Sampled, infrequently allocated - kSampled = 0x0, - // Not sampled, NUMA partition 0 - kNormalP0 = 0x1, - // Not sampled, NUMA partition 1 - kNormalP1 = (kNumaPartitions > 1) ? 0x2 : 0xff, - // Not sampled - kNormal = kNormalP0, + // Sampled, infrequently allocated + kSampled = 0x0, + // Not sampled, NUMA partition 0 + kNormalP0 = 0x1, + // Not sampled, NUMA partition 1 + kNormalP1 = (kNumaPartitions > 1) ? 0x2 : 0xff, + // Not sampled + kNormal = kNormalP0, }; inline constexpr uintptr_t kTagShift = std::min(kAddressBits - 4, 42); -inline constexpr uintptr_t kTagMask = uintptr_t{0x3} << kTagShift; +inline constexpr uintptr_t kTagMask = uintptr_t{0x3} << kTagShift; // Returns true if ptr is tagged. ABSL_DEPRECATED("Replace with specific tests") @@ -248,21 +248,21 @@ inline bool IsTaggedMemory(const void* ptr) { } inline bool IsSampledMemory(const void* ptr) { - constexpr uintptr_t kSampledNormalMask = kNumaPartitions > 1 ? 0x3 : 0x1; - - static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP0) & - kSampledNormalMask); - static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP1) & - kSampledNormalMask); - - const uintptr_t tag = - (reinterpret_cast<uintptr_t>(ptr) & kTagMask) >> kTagShift; - return (tag & kSampledNormalMask) == - static_cast<uintptr_t>(MemoryTag::kSampled); + constexpr uintptr_t kSampledNormalMask = kNumaPartitions > 1 ? 0x3 : 0x1; + + static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP0) & + kSampledNormalMask); + static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP1) & + kSampledNormalMask); + + const uintptr_t tag = + (reinterpret_cast<uintptr_t>(ptr) & kTagMask) >> kTagShift; + return (tag & kSampledNormalMask) == + static_cast<uintptr_t>(MemoryTag::kSampled); } -inline bool IsNormalMemory(const void* ptr) { return !IsSampledMemory(ptr); } - +inline bool IsNormalMemory(const void* ptr) { return !IsSampledMemory(ptr); } + inline MemoryTag GetMemoryTag(const void* ptr) { return static_cast<MemoryTag>((reinterpret_cast<uintptr_t>(ptr) & kTagMask) >> kTagShift); @@ -271,10 +271,10 @@ inline MemoryTag GetMemoryTag(const void* ptr) { absl::string_view MemoryTagToLabel(MemoryTag tag); inline constexpr bool IsExpandedSizeClass(unsigned cl) { - return kHasExpandedClasses && (cl >= kExpandedClassesStart); + return kHasExpandedClasses && (cl >= kExpandedClassesStart); } -#if !defined(TCMALLOC_SMALL_BUT_SLOW) && __SIZEOF_POINTER__ != 4 +#if !defined(TCMALLOC_SMALL_BUT_SLOW) && __SIZEOF_POINTER__ != 4 // Always allocate at least a huge page inline constexpr size_t kMinSystemAlloc = kHugePageSize; inline constexpr size_t kMinMmapAlloc = 1 << 30; // mmap() in 1GiB ranges. @@ -291,31 +291,31 @@ static_assert(kMinMmapAlloc % kMinSystemAlloc == 0, "Minimum mmap allocation size is not a multiple of" " minimum system allocation size"); -inline MemoryTag NumaNormalTag(size_t numa_partition) { - switch (numa_partition) { - case 0: - return MemoryTag::kNormalP0; - case 1: - return MemoryTag::kNormalP1; - default: - ASSUME(false); - __builtin_unreachable(); - } -} - -inline size_t NumaPartitionFromPointer(void* ptr) { - if constexpr (kNumaPartitions == 1) { - return 0; - } - - switch (GetMemoryTag(ptr)) { - case MemoryTag::kNormalP1: - return 1; - default: - return 0; - } -} - +inline MemoryTag NumaNormalTag(size_t numa_partition) { + switch (numa_partition) { + case 0: + return MemoryTag::kNormalP0; + case 1: + return MemoryTag::kNormalP1; + default: + ASSUME(false); + __builtin_unreachable(); + } +} + +inline size_t NumaPartitionFromPointer(void* ptr) { + if constexpr (kNumaPartitions == 1) { + return 0; + } + + switch (GetMemoryTag(ptr)) { + case MemoryTag::kNormalP1: + return 1; + default: + return 0; + } +} + // Size-class information + mapping class SizeMap { public: @@ -325,7 +325,7 @@ class SizeMap { static constexpr size_t kMultiPageAlignment = 64; // log2 (kMultiPageAlignment) static constexpr size_t kMultiPageAlignmentShift = - absl::bit_width(kMultiPageAlignment - 1u); + absl::bit_width(kMultiPageAlignment - 1u); private: //------------------------------------------------------------------- @@ -361,8 +361,8 @@ class SizeMap { // first member so that it inherits the overall alignment of a SizeMap // instance. In particular, if we create a SizeMap instance that's cache-line // aligned, this member is also aligned to the width of a cache line. - CompactSizeClass - class_array_[kClassArraySize * (kHasExpandedClasses ? 2 : 1)] = {0}; + CompactSizeClass + class_array_[kClassArraySize * (kHasExpandedClasses ? 2 : 1)] = {0}; // Number of objects to move between a per-thread list and a central // list in one shot. We want this to be not too small so we can @@ -413,11 +413,11 @@ class SizeMap { static const SizeClassInfo kSizeClasses[]; static const int kSizeClassesCount; - static const SizeClassInfo kExperimentalPow2Below64SizeClasses[]; - static const int kExperimentalPow2Below64SizeClassesCount; - // kExperimentalPowBelow64SizeClassesCount - static const SizeClassInfo kExperimentalPow2SizeClasses[]; - static const int kExperimentalPow2SizeClassesCount; + static const SizeClassInfo kExperimentalPow2Below64SizeClasses[]; + static const int kExperimentalPow2Below64SizeClassesCount; + // kExperimentalPowBelow64SizeClassesCount + static const SizeClassInfo kExperimentalPow2SizeClasses[]; + static const int kExperimentalPow2SizeClassesCount; // Definition of size class that is set in size_classes.cc static const SizeClassInfo kLegacySizeClasses[]; @@ -431,41 +431,41 @@ class SizeMap { // Initialize the mapping arrays void Init(); - // Returns the size class for size `size` respecting the alignment - // requirements of `policy`. + // Returns the size class for size `size` respecting the alignment + // requirements of `policy`. // // Returns true on success. Returns false if either: // - the size exceeds the maximum size class size. // - the align size is greater or equal to the default page size // - no matching properly aligned size class is available // - // Requires that policy.align() returns a non-zero power of 2. + // Requires that policy.align() returns a non-zero power of 2. // - // When policy.align() = 1 the default alignment of the size table will be - // used. If policy.align() is constexpr 1 (e.g. when using - // DefaultAlignPolicy) then alignment-related code will optimize away. - // - // TODO(b/171978365): Replace the output parameter with returning - // absl::optional<uint32_t>. - template <typename Policy> - inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(Policy policy, - size_t size, + // When policy.align() = 1 the default alignment of the size table will be + // used. If policy.align() is constexpr 1 (e.g. when using + // DefaultAlignPolicy) then alignment-related code will optimize away. + // + // TODO(b/171978365): Replace the output parameter with returning + // absl::optional<uint32_t>. + template <typename Policy> + inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(Policy policy, + size_t size, uint32_t* cl) { - const size_t align = policy.align(); - ASSERT(absl::has_single_bit(align)); + const size_t align = policy.align(); + ASSERT(absl::has_single_bit(align)); if (ABSL_PREDICT_FALSE(align >= kPageSize)) { // TODO(b/172060547): Consider changing this to align > kPageSize. ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl)); return false; } - - uint32_t idx; - if (ABSL_PREDICT_FALSE(!ClassIndexMaybe(size, &idx))) { + + uint32_t idx; + if (ABSL_PREDICT_FALSE(!ClassIndexMaybe(size, &idx))) { ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl)); return false; } - *cl = class_array_[idx] + policy.scaled_numa_partition(); + *cl = class_array_[idx] + policy.scaled_numa_partition(); // Predict that size aligned allocs most often directly map to a proper // size class, i.e., multiples of 32, 64, etc, matching our class sizes. @@ -474,7 +474,7 @@ class SizeMap { if (ABSL_PREDICT_TRUE((class_to_size(*cl) & mask) == 0)) { return true; } - } while ((++*cl % kNumBaseClasses) != 0); + } while ((++*cl % kNumBaseClasses) != 0); ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl)); return false; @@ -482,12 +482,12 @@ class SizeMap { // Returns size class for given size, or 0 if this instance has not been // initialized yet. REQUIRES: size <= kMaxSize. - template <typename Policy> - inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE SizeClass(Policy policy, - size_t size) { + template <typename Policy> + inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE SizeClass(Policy policy, + size_t size) { ASSERT(size <= kMaxSize); uint32_t ret = 0; - GetSizeClass(policy, size, &ret); + GetSizeClass(policy, size, &ret); return ret; } @@ -517,8 +517,8 @@ class SizeMap { // Linker initialized, so this lock can be accessed at any time. extern absl::base_internal::SpinLock pageheap_lock; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_COMMON_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc index 8ae02b38e9..6bd70745d4 100644 --- a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc @@ -25,7 +25,7 @@ #include "absl/base/internal/sysinfo.h" #include "absl/base/macros.h" #include "absl/base/thread_annotations.h" -#include "absl/container/fixed_array.h" +#include "absl/container/fixed_array.h" #include "tcmalloc/arena.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" @@ -34,9 +34,9 @@ #include "tcmalloc/static_vars.h" #include "tcmalloc/transfer_cache.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { static cpu_set_t FillActiveCpuMask() { cpu_set_t allowed_cpus; @@ -45,7 +45,7 @@ static cpu_set_t FillActiveCpuMask() { } #ifdef PERCPU_USE_RSEQ - const bool real_cpus = !subtle::percpu::UsingFlatVirtualCpus(); + const bool real_cpus = !subtle::percpu::UsingFlatVirtualCpus(); #else const bool real_cpus = true; #endif @@ -70,7 +70,7 @@ static size_t MaxCapacity(size_t cl) { static constexpr size_t kNumSmall = 10; // The memory used for each per-CPU slab is the sum of: - // sizeof(std::atomic<int64_t>) * kNumClasses + // sizeof(std::atomic<int64_t>) * kNumClasses // sizeof(void*) * (kSmallObjectDepth + 1) * kNumSmall // sizeof(void*) * (kLargeObjectDepth + 1) * kNumLarge // @@ -84,36 +84,36 @@ static size_t MaxCapacity(size_t cl) { // With SMALL_BUT_SLOW we have 4KiB of per-cpu slab and 46 class sizes we // allocate: // == 8 * 46 + 8 * ((16 + 1) * 10 + (6 + 1) * 35) = 4038 bytes of 4096 - static const uint16_t kSmallObjectDepth = 16; - static const uint16_t kLargeObjectDepth = 6; + static const uint16_t kSmallObjectDepth = 16; + static const uint16_t kLargeObjectDepth = 6; #else // We allocate 256KiB per-cpu for pointers to cached per-cpu memory. // Each 256KiB is a subtle::percpu::TcmallocSlab::Slabs // Max(kNumClasses) is 89, so the maximum footprint per CPU is: // 89 * 8 + 8 * ((2048 + 1) * 10 + (152 + 1) * 78 + 88) = 254 KiB - static const uint16_t kSmallObjectDepth = 2048; - static const uint16_t kLargeObjectDepth = 152; + static const uint16_t kSmallObjectDepth = 2048; + static const uint16_t kLargeObjectDepth = 152; #endif if (cl == 0 || cl >= kNumClasses) return 0; - if (Static::sharded_transfer_cache().should_use(cl)) { - return 0; - } - + if (Static::sharded_transfer_cache().should_use(cl)) { + return 0; + } + if (Static::sizemap().class_to_size(cl) == 0) { return 0; } - if (!IsExpandedSizeClass(cl) && (cl % kNumBaseClasses) <= kNumSmall) { + if (!IsExpandedSizeClass(cl) && (cl % kNumBaseClasses) <= kNumSmall) { // Small object sizes are very heavily used and need very deep caches for // good performance (well over 90% of malloc calls are for cl <= 10.) return kSmallObjectDepth; } - if (IsExpandedSizeClass(cl)) { - return 0; - } - + if (IsExpandedSizeClass(cl)) { + return 0; + } + return kLargeObjectDepth; } @@ -126,38 +126,38 @@ void CPUCache::Activate(ActivationMode mode) { ASSERT(Static::IsInited()); int num_cpus = absl::base_internal::NumCPUs(); - size_t per_cpu_shift = kPerCpuShift; - const auto &topology = Static::numa_topology(); - if (topology.numa_aware()) { - per_cpu_shift += absl::bit_ceil(topology.active_partitions() - 1); - } - - const size_t kBytesAvailable = (1 << per_cpu_shift); - size_t bytes_required = sizeof(std::atomic<int64_t>) * kNumClasses; - - // Deal with size classes that correspond only to NUMA partitions that are in - // use. If NUMA awareness is disabled then we may have a smaller shift than - // would suffice for all of the unused size classes. - for (int cl = 0; - cl < Static::numa_topology().active_partitions() * kNumBaseClasses; - ++cl) { - const uint16_t mc = MaxCapacity(cl); - max_capacity_[cl] = mc; - bytes_required += sizeof(void *) * mc; - } - - // Deal with expanded size classes. - for (int cl = kExpandedClassesStart; cl < kNumClasses; ++cl) { - const uint16_t mc = MaxCapacity(cl); - max_capacity_[cl] = mc; - bytes_required += sizeof(void *) * mc; + size_t per_cpu_shift = kPerCpuShift; + const auto &topology = Static::numa_topology(); + if (topology.numa_aware()) { + per_cpu_shift += absl::bit_ceil(topology.active_partitions() - 1); + } + + const size_t kBytesAvailable = (1 << per_cpu_shift); + size_t bytes_required = sizeof(std::atomic<int64_t>) * kNumClasses; + + // Deal with size classes that correspond only to NUMA partitions that are in + // use. If NUMA awareness is disabled then we may have a smaller shift than + // would suffice for all of the unused size classes. + for (int cl = 0; + cl < Static::numa_topology().active_partitions() * kNumBaseClasses; + ++cl) { + const uint16_t mc = MaxCapacity(cl); + max_capacity_[cl] = mc; + bytes_required += sizeof(void *) * mc; } + // Deal with expanded size classes. + for (int cl = kExpandedClassesStart; cl < kNumClasses; ++cl) { + const uint16_t mc = MaxCapacity(cl); + max_capacity_[cl] = mc; + bytes_required += sizeof(void *) * mc; + } + // As we may make certain size classes no-ops by selecting "0" at runtime, // using a compile-time calculation overestimates the worst-case memory usage. - if (ABSL_PREDICT_FALSE(bytes_required > kBytesAvailable)) { + if (ABSL_PREDICT_FALSE(bytes_required > kBytesAvailable)) { Crash(kCrash, __FILE__, __LINE__, "per-CPU memory exceeded, have ", - kBytesAvailable, " need ", bytes_required); + kBytesAvailable, " need ", bytes_required); } absl::base_internal::SpinLockHolder h(&pageheap_lock); @@ -173,11 +173,11 @@ void CPUCache::Activate(ActivationMode mode) { resize_[cpu].per_class[cl].Init(); } resize_[cpu].available.store(max_cache_size, std::memory_order_relaxed); - resize_[cpu].capacity.store(max_cache_size, std::memory_order_relaxed); + resize_[cpu].capacity.store(max_cache_size, std::memory_order_relaxed); resize_[cpu].last_steal.store(1, std::memory_order_relaxed); } - freelist_.Init(SlabAlloc, MaxCapacityHelper, lazy_slabs_, per_cpu_shift); + freelist_.Init(SlabAlloc, MaxCapacityHelper, lazy_slabs_, per_cpu_shift); if (mode == ActivationMode::FastPathOn) { Static::ActivateCPUCache(); } @@ -228,15 +228,15 @@ void *CPUCache::Refill(int cpu, size_t cl) { if (i != 0) { static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, "not enough space in batch"); - Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, i)); + Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, i)); } } - } while (got == batch_length && i == 0 && total < target && - cpu == freelist_.GetCurrentVirtualCpuUnsafe()); + } while (got == batch_length && i == 0 && total < target && + cpu == freelist_.GetCurrentVirtualCpuUnsafe()); for (int i = to_return.count; i < kMaxToReturn; ++i) { Static::transfer_cache().InsertRange( - to_return.cl[i], absl::Span<void *>(&(to_return.obj[i]), 1)); + to_return.cl[i], absl::Span<void *>(&(to_return.obj[i]), 1)); } return result; @@ -263,7 +263,7 @@ size_t CPUCache::UpdateCapacity(int cpu, size_t cl, size_t batch_length, // it again. Also we will shrink it by 1, but grow by a batch. So we should // have lots of time until we need to grow it again. - const size_t max_capacity = max_capacity_[cl]; + const size_t max_capacity = max_capacity_[cl]; size_t capacity = freelist_.Capacity(cpu, cl); // We assert that the return value, target, is non-zero, so starting from an // initial capacity of zero means we may be populating this core for the @@ -273,7 +273,7 @@ size_t CPUCache::UpdateCapacity(int cpu, size_t cl, size_t batch_length, [](CPUCache *cache, int cpu) { if (cache->lazy_slabs_) { absl::base_internal::SpinLockHolder h(&cache->resize_[cpu].lock); - cache->freelist_.InitCPU(cpu, MaxCapacityHelper); + cache->freelist_.InitCPU(cpu, MaxCapacityHelper); } // While we could unconditionally store, a lazy slab population @@ -352,7 +352,7 @@ void CPUCache::Grow(int cpu, size_t cl, size_t desired_increase, size_t actual_increase = acquired_bytes / size; actual_increase = std::min(actual_increase, desired_increase); // Remember, Grow may not give us all we ask for. - size_t increase = freelist_.Grow(cpu, cl, actual_increase, max_capacity_[cl]); + size_t increase = freelist_.Grow(cpu, cl, actual_increase, max_capacity_[cl]); size_t increased_bytes = increase * size; if (increased_bytes < acquired_bytes) { // return whatever we didn't use to the slack. @@ -361,285 +361,285 @@ void CPUCache::Grow(int cpu, size_t cl, size_t desired_increase, } } -void CPUCache::TryReclaimingCaches() { - const int num_cpus = absl::base_internal::NumCPUs(); - - for (int cpu = 0; cpu < num_cpus; ++cpu) { - // Nothing to reclaim if the cpu is not populated. - if (!HasPopulated(cpu)) { - continue; - } - - uint64_t used_bytes = UsedBytes(cpu); - uint64_t prev_used_bytes = - resize_[cpu].reclaim_used_bytes.load(std::memory_order_relaxed); - - // Get reclaim miss and used bytes stats that were captured at the end of - // the previous interval. - const CpuCacheMissStats miss_stats = GetReclaimCacheMissStats(cpu); - uint64_t misses = - uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}; - - // Reclaim the cache if the number of used bytes and total number of misses - // stayed constant since the last interval. - if (used_bytes != 0 && used_bytes == prev_used_bytes && misses == 0) { - Reclaim(cpu); - } - - // Takes a snapshot of used bytes in the cache at the end of this interval - // so that we can calculate if cache usage changed in the next interval. - // - // Reclaim occurs on a single thread. So, the relaxed store to used_bytes - // is safe. - resize_[cpu].reclaim_used_bytes.store(used_bytes, - std::memory_order_relaxed); - } -} - -void CPUCache::ShuffleCpuCaches() { - // Knobs that we can potentially tune depending on the workloads. - constexpr double kBytesToStealPercent = 5.0; - constexpr int kMaxNumStealCpus = 5; - - const int num_cpus = absl::base_internal::NumCPUs(); - absl::FixedArray<std::pair<int, uint64_t>> misses(num_cpus); - - // Record the cumulative misses for the caches so that we can select the - // caches with the highest misses as the candidates to steal the cache for. - int max_populated_cpu = -1; - int num_populated_cpus = 0; - for (int cpu = 0; cpu < num_cpus; ++cpu) { - if (!HasPopulated(cpu)) { - continue; - } - const CpuCacheMissStats miss_stats = GetIntervalCacheMissStats(cpu); - misses[num_populated_cpus] = { - cpu, uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}}; - max_populated_cpu = cpu; - ++num_populated_cpus; - } - if (max_populated_cpu == -1) { - return; - } - - // Sorts misses to identify cpus with highest misses. - // - // TODO(vgogte): We can potentially sort the entire misses array and use that - // in StealFromOtherCache to determine cpus to steal from. That is, [0, - // num_dest_cpus) may be the destination cpus and [num_dest_cpus, num_cpus) - // may be cpus we may steal from. We can iterate through the array in a - // descending order to steal from them. The upside of this mechanism is that - // we would be able to do a more fair stealing, starting with cpus with lowest - // misses. The downside of this mechanism is that we would have to sort the - // entire misses array. This might be compute intensive on servers with high - // number of cpus (eg. Rome, Milan). We need to investigate the compute - // required to implement this. - const int num_dest_cpus = std::min(num_populated_cpus, kMaxNumStealCpus); - std::partial_sort(misses.begin(), misses.begin() + num_dest_cpus, - misses.end(), - [](std::pair<int, uint64_t> a, std::pair<int, uint64_t> b) { - if (a.second == b.second) { - return a.first < b.first; - } - return a.second > b.second; - }); - - // Try to steal kBytesToStealPercent percentage of max_per_cpu_cache_size for - // each destination cpu cache. - size_t to_steal = - kBytesToStealPercent / 100.0 * Parameters::max_per_cpu_cache_size(); - for (int i = 0; i < num_dest_cpus; ++i) { - StealFromOtherCache(misses[i].first, max_populated_cpu, to_steal); - } - - // Takes a snapshot of underflows and overflows at the end of this interval - // so that we can calculate the misses that occurred in the next interval. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - size_t underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - size_t overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - - // Shuffle occurs on a single thread. So, the relaxed stores to - // prev_underflow and pre_overflow counters are safe. - resize_[cpu].shuffle_underflows.store(underflows, - std::memory_order_relaxed); - resize_[cpu].shuffle_overflows.store(overflows, std::memory_order_relaxed); - } -} - -static void ShrinkHandler(void *arg, size_t cl, void **batch, size_t count) { - const size_t batch_length = Static::sizemap().num_objects_to_move(cl); - for (size_t i = 0; i < count; i += batch_length) { - size_t n = std::min(batch_length, count - i); - Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n)); - } -} - -void CPUCache::StealFromOtherCache(int cpu, int max_populated_cpu, - size_t bytes) { - constexpr double kCacheMissThreshold = 0.80; - - const CpuCacheMissStats dest_misses = GetIntervalCacheMissStats(cpu); - - // If both underflows and overflows are 0, we should not need to steal. - if (dest_misses.underflows == 0 && dest_misses.overflows == 0) return; - - size_t acquired = 0; - - // We use last_cpu_cache_steal_ as a hint to start our search for cpu ids to - // steal from so that we can iterate through the cpus in a nice round-robin - // fashion. - int src_cpu = std::min(last_cpu_cache_steal_.load(std::memory_order_relaxed), - max_populated_cpu); - - // We iterate through max_populate_cpus number of cpus to steal from. - // max_populate_cpus records the max cpu id that has been populated. Note - // that, any intermediate changes since the max_populated_cpus was measured - // may have populated higher cpu ids, but we do not include those in the - // search. The approximation prevents us from doing another pass through the - // cpus to just find the latest populated cpu id. - // - // We break from the loop once we iterate through all the cpus once, or if the - // total number of acquired bytes is higher than or equal to the desired bytes - // we want to steal. - for (int cpu_offset = 1; cpu_offset <= max_populated_cpu && acquired < bytes; - ++cpu_offset) { - if (--src_cpu < 0) { - src_cpu = max_populated_cpu; - } - ASSERT(0 <= src_cpu); - ASSERT(src_cpu <= max_populated_cpu); - - // We do not steal from the same CPU. Maybe we can explore combining this - // with stealing from the same CPU later. - if (src_cpu == cpu) continue; - - // We do not steal from the cache that hasn't been populated yet. - if (!HasPopulated(src_cpu)) continue; - - // We do not steal from cache that has capacity less than our lower - // capacity threshold. - if (Capacity(src_cpu) < - kCacheCapacityThreshold * Parameters::max_per_cpu_cache_size()) - continue; - - const CpuCacheMissStats src_misses = GetIntervalCacheMissStats(src_cpu); - - // If underflows and overflows from the source cpu are higher, we do not - // steal from that cache. We consider the cache as a candidate to steal from - // only when its misses are lower than 0.8x that of the dest cache. - if (src_misses.underflows > kCacheMissThreshold * dest_misses.underflows || - src_misses.overflows > kCacheMissThreshold * dest_misses.overflows) - continue; - - size_t start_cl = - resize_[src_cpu].last_steal.load(std::memory_order_relaxed); - - ASSERT(start_cl < kNumClasses); - ASSERT(0 < start_cl); - size_t source_cl = start_cl; - for (size_t offset = 1; offset < kNumClasses; ++offset) { - source_cl = start_cl + offset; - if (source_cl >= kNumClasses) { - source_cl -= kNumClasses - 1; - } - ASSERT(0 < source_cl); - ASSERT(source_cl < kNumClasses); - - const size_t capacity = freelist_.Capacity(src_cpu, source_cl); - if (capacity == 0) { - // Nothing to steal. - continue; - } - const size_t length = freelist_.Length(src_cpu, source_cl); - - // TODO(vgogte): Currently, scoring is similar to stealing from the - // same cpu in CpuCache::Steal(). Revisit this later to tune the - // knobs. - const size_t batch_length = - Static::sizemap().num_objects_to_move(source_cl); - size_t size = Static::sizemap().class_to_size(source_cl); - - // Clock-like algorithm to prioritize size classes for shrinking. - // - // Each size class has quiescent ticks counter which is incremented as we - // pass it, the counter is reset to 0 in UpdateCapacity on grow. - // If the counter value is 0, then we've just tried to grow the size - // class, so it makes little sense to shrink it back. The higher counter - // value the longer ago we grew the list and the more probable it is that - // the full capacity is unused. - // - // Then, we calculate "shrinking score", the higher the score the less we - // we want to shrink this size class. The score is considerably skewed - // towards larger size classes: smaller classes are usually used more - // actively and we also benefit less from shrinking smaller classes (steal - // less capacity). Then, we also avoid shrinking full freelists as we will - // need to evict an object and then go to the central freelist to return - // it. Then, we also avoid shrinking freelists that are just above batch - // size, because shrinking them will disable transfer cache. - // - // Finally, we shrink if the ticks counter is >= the score. - uint32_t qticks = resize_[src_cpu].per_class[source_cl].Tick(); - uint32_t score = 0; - // Note: the following numbers are based solely on intuition, common sense - // and benchmarking results. - if (size <= 144) { - score = 2 + (length >= capacity) + - (length >= batch_length && length < 2 * batch_length); - } else if (size <= 1024) { - score = 1 + (length >= capacity) + - (length >= batch_length && length < 2 * batch_length); - } else if (size <= (64 << 10)) { - score = (length >= capacity); - } - if (score > qticks) { - continue; - } - - // Finally, try to shrink (can fail if we were migrated). - // We always shrink by 1 object. The idea is that inactive lists will be - // shrunk to zero eventually anyway (or they just would not grow in the - // first place), but for active lists it does not make sense to - // aggressively shuffle capacity all the time. - // - // If the list is full, ShrinkOtherCache first tries to pop enough items - // to make space and then shrinks the capacity. - // TODO(vgogte): Maybe we can steal more from a single list to avoid - // frequent locking overhead. - { - absl::base_internal::SpinLockHolder h(&resize_[src_cpu].lock); - if (freelist_.ShrinkOtherCache(src_cpu, source_cl, 1, nullptr, - ShrinkHandler) == 1) { - acquired += size; - resize_[src_cpu].capacity.fetch_sub(size, std::memory_order_relaxed); - } - } - - if (acquired >= bytes) { - break; - } - } - resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed); - } - // Record the last cpu id we stole from, which would provide a hint to the - // next time we iterate through the cpus for stealing. - last_cpu_cache_steal_.store(src_cpu, std::memory_order_relaxed); - - // Increment the capacity of the destination cpu cache by the amount of bytes - // acquired from source caches. - if (acquired) { - size_t before = resize_[cpu].available.load(std::memory_order_relaxed); - size_t bytes_with_stolen; - do { - bytes_with_stolen = before + acquired; - } while (!resize_[cpu].available.compare_exchange_weak( - before, bytes_with_stolen, std::memory_order_relaxed, - std::memory_order_relaxed)); - resize_[cpu].capacity.fetch_add(acquired, std::memory_order_relaxed); - } -} - +void CPUCache::TryReclaimingCaches() { + const int num_cpus = absl::base_internal::NumCPUs(); + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + // Nothing to reclaim if the cpu is not populated. + if (!HasPopulated(cpu)) { + continue; + } + + uint64_t used_bytes = UsedBytes(cpu); + uint64_t prev_used_bytes = + resize_[cpu].reclaim_used_bytes.load(std::memory_order_relaxed); + + // Get reclaim miss and used bytes stats that were captured at the end of + // the previous interval. + const CpuCacheMissStats miss_stats = GetReclaimCacheMissStats(cpu); + uint64_t misses = + uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}; + + // Reclaim the cache if the number of used bytes and total number of misses + // stayed constant since the last interval. + if (used_bytes != 0 && used_bytes == prev_used_bytes && misses == 0) { + Reclaim(cpu); + } + + // Takes a snapshot of used bytes in the cache at the end of this interval + // so that we can calculate if cache usage changed in the next interval. + // + // Reclaim occurs on a single thread. So, the relaxed store to used_bytes + // is safe. + resize_[cpu].reclaim_used_bytes.store(used_bytes, + std::memory_order_relaxed); + } +} + +void CPUCache::ShuffleCpuCaches() { + // Knobs that we can potentially tune depending on the workloads. + constexpr double kBytesToStealPercent = 5.0; + constexpr int kMaxNumStealCpus = 5; + + const int num_cpus = absl::base_internal::NumCPUs(); + absl::FixedArray<std::pair<int, uint64_t>> misses(num_cpus); + + // Record the cumulative misses for the caches so that we can select the + // caches with the highest misses as the candidates to steal the cache for. + int max_populated_cpu = -1; + int num_populated_cpus = 0; + for (int cpu = 0; cpu < num_cpus; ++cpu) { + if (!HasPopulated(cpu)) { + continue; + } + const CpuCacheMissStats miss_stats = GetIntervalCacheMissStats(cpu); + misses[num_populated_cpus] = { + cpu, uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}}; + max_populated_cpu = cpu; + ++num_populated_cpus; + } + if (max_populated_cpu == -1) { + return; + } + + // Sorts misses to identify cpus with highest misses. + // + // TODO(vgogte): We can potentially sort the entire misses array and use that + // in StealFromOtherCache to determine cpus to steal from. That is, [0, + // num_dest_cpus) may be the destination cpus and [num_dest_cpus, num_cpus) + // may be cpus we may steal from. We can iterate through the array in a + // descending order to steal from them. The upside of this mechanism is that + // we would be able to do a more fair stealing, starting with cpus with lowest + // misses. The downside of this mechanism is that we would have to sort the + // entire misses array. This might be compute intensive on servers with high + // number of cpus (eg. Rome, Milan). We need to investigate the compute + // required to implement this. + const int num_dest_cpus = std::min(num_populated_cpus, kMaxNumStealCpus); + std::partial_sort(misses.begin(), misses.begin() + num_dest_cpus, + misses.end(), + [](std::pair<int, uint64_t> a, std::pair<int, uint64_t> b) { + if (a.second == b.second) { + return a.first < b.first; + } + return a.second > b.second; + }); + + // Try to steal kBytesToStealPercent percentage of max_per_cpu_cache_size for + // each destination cpu cache. + size_t to_steal = + kBytesToStealPercent / 100.0 * Parameters::max_per_cpu_cache_size(); + for (int i = 0; i < num_dest_cpus; ++i) { + StealFromOtherCache(misses[i].first, max_populated_cpu, to_steal); + } + + // Takes a snapshot of underflows and overflows at the end of this interval + // so that we can calculate the misses that occurred in the next interval. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + size_t underflows = + resize_[cpu].total_underflows.load(std::memory_order_relaxed); + size_t overflows = + resize_[cpu].total_overflows.load(std::memory_order_relaxed); + + // Shuffle occurs on a single thread. So, the relaxed stores to + // prev_underflow and pre_overflow counters are safe. + resize_[cpu].shuffle_underflows.store(underflows, + std::memory_order_relaxed); + resize_[cpu].shuffle_overflows.store(overflows, std::memory_order_relaxed); + } +} + +static void ShrinkHandler(void *arg, size_t cl, void **batch, size_t count) { + const size_t batch_length = Static::sizemap().num_objects_to_move(cl); + for (size_t i = 0; i < count; i += batch_length) { + size_t n = std::min(batch_length, count - i); + Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n)); + } +} + +void CPUCache::StealFromOtherCache(int cpu, int max_populated_cpu, + size_t bytes) { + constexpr double kCacheMissThreshold = 0.80; + + const CpuCacheMissStats dest_misses = GetIntervalCacheMissStats(cpu); + + // If both underflows and overflows are 0, we should not need to steal. + if (dest_misses.underflows == 0 && dest_misses.overflows == 0) return; + + size_t acquired = 0; + + // We use last_cpu_cache_steal_ as a hint to start our search for cpu ids to + // steal from so that we can iterate through the cpus in a nice round-robin + // fashion. + int src_cpu = std::min(last_cpu_cache_steal_.load(std::memory_order_relaxed), + max_populated_cpu); + + // We iterate through max_populate_cpus number of cpus to steal from. + // max_populate_cpus records the max cpu id that has been populated. Note + // that, any intermediate changes since the max_populated_cpus was measured + // may have populated higher cpu ids, but we do not include those in the + // search. The approximation prevents us from doing another pass through the + // cpus to just find the latest populated cpu id. + // + // We break from the loop once we iterate through all the cpus once, or if the + // total number of acquired bytes is higher than or equal to the desired bytes + // we want to steal. + for (int cpu_offset = 1; cpu_offset <= max_populated_cpu && acquired < bytes; + ++cpu_offset) { + if (--src_cpu < 0) { + src_cpu = max_populated_cpu; + } + ASSERT(0 <= src_cpu); + ASSERT(src_cpu <= max_populated_cpu); + + // We do not steal from the same CPU. Maybe we can explore combining this + // with stealing from the same CPU later. + if (src_cpu == cpu) continue; + + // We do not steal from the cache that hasn't been populated yet. + if (!HasPopulated(src_cpu)) continue; + + // We do not steal from cache that has capacity less than our lower + // capacity threshold. + if (Capacity(src_cpu) < + kCacheCapacityThreshold * Parameters::max_per_cpu_cache_size()) + continue; + + const CpuCacheMissStats src_misses = GetIntervalCacheMissStats(src_cpu); + + // If underflows and overflows from the source cpu are higher, we do not + // steal from that cache. We consider the cache as a candidate to steal from + // only when its misses are lower than 0.8x that of the dest cache. + if (src_misses.underflows > kCacheMissThreshold * dest_misses.underflows || + src_misses.overflows > kCacheMissThreshold * dest_misses.overflows) + continue; + + size_t start_cl = + resize_[src_cpu].last_steal.load(std::memory_order_relaxed); + + ASSERT(start_cl < kNumClasses); + ASSERT(0 < start_cl); + size_t source_cl = start_cl; + for (size_t offset = 1; offset < kNumClasses; ++offset) { + source_cl = start_cl + offset; + if (source_cl >= kNumClasses) { + source_cl -= kNumClasses - 1; + } + ASSERT(0 < source_cl); + ASSERT(source_cl < kNumClasses); + + const size_t capacity = freelist_.Capacity(src_cpu, source_cl); + if (capacity == 0) { + // Nothing to steal. + continue; + } + const size_t length = freelist_.Length(src_cpu, source_cl); + + // TODO(vgogte): Currently, scoring is similar to stealing from the + // same cpu in CpuCache::Steal(). Revisit this later to tune the + // knobs. + const size_t batch_length = + Static::sizemap().num_objects_to_move(source_cl); + size_t size = Static::sizemap().class_to_size(source_cl); + + // Clock-like algorithm to prioritize size classes for shrinking. + // + // Each size class has quiescent ticks counter which is incremented as we + // pass it, the counter is reset to 0 in UpdateCapacity on grow. + // If the counter value is 0, then we've just tried to grow the size + // class, so it makes little sense to shrink it back. The higher counter + // value the longer ago we grew the list and the more probable it is that + // the full capacity is unused. + // + // Then, we calculate "shrinking score", the higher the score the less we + // we want to shrink this size class. The score is considerably skewed + // towards larger size classes: smaller classes are usually used more + // actively and we also benefit less from shrinking smaller classes (steal + // less capacity). Then, we also avoid shrinking full freelists as we will + // need to evict an object and then go to the central freelist to return + // it. Then, we also avoid shrinking freelists that are just above batch + // size, because shrinking them will disable transfer cache. + // + // Finally, we shrink if the ticks counter is >= the score. + uint32_t qticks = resize_[src_cpu].per_class[source_cl].Tick(); + uint32_t score = 0; + // Note: the following numbers are based solely on intuition, common sense + // and benchmarking results. + if (size <= 144) { + score = 2 + (length >= capacity) + + (length >= batch_length && length < 2 * batch_length); + } else if (size <= 1024) { + score = 1 + (length >= capacity) + + (length >= batch_length && length < 2 * batch_length); + } else if (size <= (64 << 10)) { + score = (length >= capacity); + } + if (score > qticks) { + continue; + } + + // Finally, try to shrink (can fail if we were migrated). + // We always shrink by 1 object. The idea is that inactive lists will be + // shrunk to zero eventually anyway (or they just would not grow in the + // first place), but for active lists it does not make sense to + // aggressively shuffle capacity all the time. + // + // If the list is full, ShrinkOtherCache first tries to pop enough items + // to make space and then shrinks the capacity. + // TODO(vgogte): Maybe we can steal more from a single list to avoid + // frequent locking overhead. + { + absl::base_internal::SpinLockHolder h(&resize_[src_cpu].lock); + if (freelist_.ShrinkOtherCache(src_cpu, source_cl, 1, nullptr, + ShrinkHandler) == 1) { + acquired += size; + resize_[src_cpu].capacity.fetch_sub(size, std::memory_order_relaxed); + } + } + + if (acquired >= bytes) { + break; + } + } + resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed); + } + // Record the last cpu id we stole from, which would provide a hint to the + // next time we iterate through the cpus for stealing. + last_cpu_cache_steal_.store(src_cpu, std::memory_order_relaxed); + + // Increment the capacity of the destination cpu cache by the amount of bytes + // acquired from source caches. + if (acquired) { + size_t before = resize_[cpu].available.load(std::memory_order_relaxed); + size_t bytes_with_stolen; + do { + bytes_with_stolen = before + acquired; + } while (!resize_[cpu].available.compare_exchange_weak( + before, bytes_with_stolen, std::memory_order_relaxed, + std::memory_order_relaxed)); + resize_[cpu].capacity.fetch_add(acquired, std::memory_order_relaxed); + } +} + // There are rather a lot of policy knobs we could tweak here. size_t CPUCache::Steal(int cpu, size_t dest_cl, size_t bytes, ObjectsToReturn *to_return) { @@ -734,7 +734,7 @@ size_t CPUCache::Steal(int cpu, size_t dest_cl, size_t bytes, acquired += size; } - if (cpu != freelist_.GetCurrentVirtualCpuUnsafe() || acquired >= bytes) { + if (cpu != freelist_.GetCurrentVirtualCpuUnsafe() || acquired >= bytes) { // can't steal any more or don't need to break; } @@ -762,28 +762,28 @@ int CPUCache::Overflow(void *ptr, size_t cl, int cpu) { total += count; static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, "not enough space in batch"); - Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, count)); + Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, count)); if (count != batch_length) break; count = 0; - } while (total < target && cpu == freelist_.GetCurrentVirtualCpuUnsafe()); + } while (total < target && cpu == freelist_.GetCurrentVirtualCpuUnsafe()); tracking::Report(kFreeTruncations, cl, 1); return 1; } -uint64_t CPUCache::Allocated(int target_cpu) const { - ASSERT(target_cpu >= 0); - if (!HasPopulated(target_cpu)) { - return 0; - } - - uint64_t total = 0; - for (int cl = 1; cl < kNumClasses; cl++) { - int size = Static::sizemap().class_to_size(cl); - total += size * freelist_.Capacity(target_cpu, cl); - } - return total; -} - +uint64_t CPUCache::Allocated(int target_cpu) const { + ASSERT(target_cpu >= 0); + if (!HasPopulated(target_cpu)) { + return 0; + } + + uint64_t total = 0; + for (int cl = 1; cl < kNumClasses; cl++) { + int size = Static::sizemap().class_to_size(cl); + total += size * freelist_.Capacity(target_cpu, cl); + } + return total; +} + uint64_t CPUCache::UsedBytes(int target_cpu) const { ASSERT(target_cpu >= 0); if (!HasPopulated(target_cpu)) { @@ -834,10 +834,10 @@ uint64_t CPUCache::Unallocated(int cpu) const { return resize_[cpu].available.load(std::memory_order_relaxed); } -uint64_t CPUCache::Capacity(int cpu) const { - return resize_[cpu].capacity.load(std::memory_order_relaxed); -} - +uint64_t CPUCache::Capacity(int cpu) const { + return resize_[cpu].capacity.load(std::memory_order_relaxed); +} + uint64_t CPUCache::CacheLimit() const { return Parameters::max_per_cpu_cache_size(); } @@ -858,7 +858,7 @@ static void DrainHandler(void *arg, size_t cl, void **batch, size_t count, ctx->available->fetch_add(cap * size, std::memory_order_relaxed); for (size_t i = 0; i < count; i += batch_length) { size_t n = std::min(batch_length, count - i); - Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n)); + Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n)); } } @@ -874,101 +874,101 @@ uint64_t CPUCache::Reclaim(int cpu) { DrainContext ctx{&resize_[cpu].available, 0}; freelist_.Drain(cpu, &ctx, DrainHandler); - - // Record that the reclaim occurred for this CPU. - resize_[cpu].num_reclaims.store( - resize_[cpu].num_reclaims.load(std::memory_order_relaxed) + 1, - std::memory_order_relaxed); + + // Record that the reclaim occurred for this CPU. + resize_[cpu].num_reclaims.store( + resize_[cpu].num_reclaims.load(std::memory_order_relaxed) + 1, + std::memory_order_relaxed); return ctx.bytes; } -uint64_t CPUCache::GetNumReclaims(int cpu) const { - return resize_[cpu].num_reclaims.load(std::memory_order_relaxed); -} - -void CPUCache::RecordCacheMissStat(const int cpu, const bool is_malloc) { - CPUCache &cpu_cache = Static::cpu_cache(); - if (is_malloc) { - cpu_cache.resize_[cpu].total_underflows.fetch_add( - 1, std::memory_order_relaxed); - } else { - cpu_cache.resize_[cpu].total_overflows.fetch_add(1, - std::memory_order_relaxed); - } -} - -CPUCache::CpuCacheMissStats CPUCache::GetReclaimCacheMissStats(int cpu) const { - CpuCacheMissStats stats; - size_t total_underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - size_t prev_reclaim_underflows = - resize_[cpu].reclaim_underflows.load(std::memory_order_relaxed); - // Takes a snapshot of underflows at the end of this interval so that we can - // calculate the misses that occurred in the next interval. - // - // Reclaim occurs on a single thread. So, a relaxed store to the reclaim - // underflow stat is safe. - resize_[cpu].reclaim_underflows.store(total_underflows, - std::memory_order_relaxed); - - // In case of a size_t overflow, we wrap around to 0. - stats.underflows = total_underflows > prev_reclaim_underflows - ? total_underflows - prev_reclaim_underflows - : 0; - - size_t total_overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - size_t prev_reclaim_overflows = - resize_[cpu].reclaim_overflows.load(std::memory_order_relaxed); - // Takes a snapshot of overflows at the end of this interval so that we can - // calculate the misses that occurred in the next interval. - // - // Reclaim occurs on a single thread. So, a relaxed store to the reclaim - // overflow stat is safe. - resize_[cpu].reclaim_overflows.store(total_overflows, - std::memory_order_relaxed); - - // In case of a size_t overflow, we wrap around to 0. - stats.overflows = total_overflows > prev_reclaim_overflows - ? total_overflows - prev_reclaim_overflows - : 0; - - return stats; -} - -CPUCache::CpuCacheMissStats CPUCache::GetIntervalCacheMissStats(int cpu) const { - CpuCacheMissStats stats; - size_t total_underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - size_t shuffle_underflows = - resize_[cpu].shuffle_underflows.load(std::memory_order_relaxed); - // In case of a size_t overflow, we wrap around to 0. - stats.underflows = total_underflows > shuffle_underflows - ? total_underflows - shuffle_underflows - : 0; - - size_t total_overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - size_t shuffle_overflows = - resize_[cpu].shuffle_overflows.load(std::memory_order_relaxed); - // In case of a size_t overflow, we wrap around to 0. - stats.overflows = total_overflows > shuffle_overflows - ? total_overflows - shuffle_overflows - : 0; - - return stats; -} - -CPUCache::CpuCacheMissStats CPUCache::GetTotalCacheMissStats(int cpu) const { - CpuCacheMissStats stats; - stats.underflows = - resize_[cpu].total_underflows.load(std::memory_order_relaxed); - stats.overflows = - resize_[cpu].total_overflows.load(std::memory_order_relaxed); - return stats; -} - -void CPUCache::Print(Printer *out) const { +uint64_t CPUCache::GetNumReclaims(int cpu) const { + return resize_[cpu].num_reclaims.load(std::memory_order_relaxed); +} + +void CPUCache::RecordCacheMissStat(const int cpu, const bool is_malloc) { + CPUCache &cpu_cache = Static::cpu_cache(); + if (is_malloc) { + cpu_cache.resize_[cpu].total_underflows.fetch_add( + 1, std::memory_order_relaxed); + } else { + cpu_cache.resize_[cpu].total_overflows.fetch_add(1, + std::memory_order_relaxed); + } +} + +CPUCache::CpuCacheMissStats CPUCache::GetReclaimCacheMissStats(int cpu) const { + CpuCacheMissStats stats; + size_t total_underflows = + resize_[cpu].total_underflows.load(std::memory_order_relaxed); + size_t prev_reclaim_underflows = + resize_[cpu].reclaim_underflows.load(std::memory_order_relaxed); + // Takes a snapshot of underflows at the end of this interval so that we can + // calculate the misses that occurred in the next interval. + // + // Reclaim occurs on a single thread. So, a relaxed store to the reclaim + // underflow stat is safe. + resize_[cpu].reclaim_underflows.store(total_underflows, + std::memory_order_relaxed); + + // In case of a size_t overflow, we wrap around to 0. + stats.underflows = total_underflows > prev_reclaim_underflows + ? total_underflows - prev_reclaim_underflows + : 0; + + size_t total_overflows = + resize_[cpu].total_overflows.load(std::memory_order_relaxed); + size_t prev_reclaim_overflows = + resize_[cpu].reclaim_overflows.load(std::memory_order_relaxed); + // Takes a snapshot of overflows at the end of this interval so that we can + // calculate the misses that occurred in the next interval. + // + // Reclaim occurs on a single thread. So, a relaxed store to the reclaim + // overflow stat is safe. + resize_[cpu].reclaim_overflows.store(total_overflows, + std::memory_order_relaxed); + + // In case of a size_t overflow, we wrap around to 0. + stats.overflows = total_overflows > prev_reclaim_overflows + ? total_overflows - prev_reclaim_overflows + : 0; + + return stats; +} + +CPUCache::CpuCacheMissStats CPUCache::GetIntervalCacheMissStats(int cpu) const { + CpuCacheMissStats stats; + size_t total_underflows = + resize_[cpu].total_underflows.load(std::memory_order_relaxed); + size_t shuffle_underflows = + resize_[cpu].shuffle_underflows.load(std::memory_order_relaxed); + // In case of a size_t overflow, we wrap around to 0. + stats.underflows = total_underflows > shuffle_underflows + ? total_underflows - shuffle_underflows + : 0; + + size_t total_overflows = + resize_[cpu].total_overflows.load(std::memory_order_relaxed); + size_t shuffle_overflows = + resize_[cpu].shuffle_overflows.load(std::memory_order_relaxed); + // In case of a size_t overflow, we wrap around to 0. + stats.overflows = total_overflows > shuffle_overflows + ? total_overflows - shuffle_overflows + : 0; + + return stats; +} + +CPUCache::CpuCacheMissStats CPUCache::GetTotalCacheMissStats(int cpu) const { + CpuCacheMissStats stats; + stats.underflows = + resize_[cpu].total_underflows.load(std::memory_order_relaxed); + stats.overflows = + resize_[cpu].total_overflows.load(std::memory_order_relaxed); + return stats; +} + +void CPUCache::Print(Printer *out) const { out->printf("------------------------------------------------\n"); out->printf("Bytes in per-CPU caches (per cpu limit: %" PRIu64 " bytes)\n", Static::cpu_cache().CacheLimit()); @@ -990,23 +990,23 @@ void CPUCache::Print(Printer *out) const { CPU_ISSET(cpu, &allowed_cpus) ? " active" : "", populated ? " populated" : ""); } - - out->printf("------------------------------------------------\n"); - out->printf("Number of per-CPU cache underflows, overflows and reclaims\n"); - out->printf("------------------------------------------------\n"); - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); - uint64_t reclaims = GetNumReclaims(cpu); - out->printf( - "cpu %3d:" - "%12" PRIu64 - " underflows," - "%12" PRIu64 - " overflows," - "%12" PRIu64 " reclaims\n", - cpu, miss_stats.underflows, miss_stats.overflows, reclaims); - } + + out->printf("------------------------------------------------\n"); + out->printf("Number of per-CPU cache underflows, overflows and reclaims\n"); + out->printf("------------------------------------------------\n"); + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; + ++cpu) { + CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); + uint64_t reclaims = GetNumReclaims(cpu); + out->printf( + "cpu %3d:" + "%12" PRIu64 + " underflows," + "%12" PRIu64 + " overflows," + "%12" PRIu64 " reclaims\n", + cpu, miss_stats.underflows, miss_stats.overflows, reclaims); + } } void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { @@ -1018,33 +1018,33 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const { uint64_t rbytes = UsedBytes(cpu); bool populated = HasPopulated(cpu); uint64_t unallocated = Unallocated(cpu); - CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); - uint64_t reclaims = GetNumReclaims(cpu); + CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu); + uint64_t reclaims = GetNumReclaims(cpu); entry.PrintI64("cpu", uint64_t(cpu)); entry.PrintI64("used", rbytes); entry.PrintI64("unused", unallocated); entry.PrintBool("active", CPU_ISSET(cpu, &allowed_cpus)); entry.PrintBool("populated", populated); - entry.PrintI64("underflows", miss_stats.underflows); - entry.PrintI64("overflows", miss_stats.overflows); - entry.PrintI64("reclaims", reclaims); - } -} - -void CPUCache::AcquireInternalLocks() { - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - resize_[cpu].lock.Lock(); - } -} - -void CPUCache::ReleaseInternalLocks() { - for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; - ++cpu) { - resize_[cpu].lock.Unlock(); + entry.PrintI64("underflows", miss_stats.underflows); + entry.PrintI64("overflows", miss_stats.overflows); + entry.PrintI64("reclaims", reclaims); } } +void CPUCache::AcquireInternalLocks() { + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; + ++cpu) { + resize_[cpu].lock.Lock(); + } +} + +void CPUCache::ReleaseInternalLocks() { + for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus; + ++cpu) { + resize_[cpu].lock.Unlock(); + } +} + void CPUCache::PerClassResizeInfo::Init() { state_.store(0, std::memory_order_relaxed); } @@ -1077,17 +1077,17 @@ uint32_t CPUCache::PerClassResizeInfo::Tick() { return state.quiescent_ticks - 1; } -#ifdef ABSL_HAVE_THREAD_SANITIZER -extern "C" int RunningOnValgrind(); -#endif - +#ifdef ABSL_HAVE_THREAD_SANITIZER +extern "C" int RunningOnValgrind(); +#endif + static void ActivatePerCPUCaches() { - if (tcmalloc::tcmalloc_internal::Static::CPUCacheActive()) { - // Already active. - return; - } - -#ifdef ABSL_HAVE_THREAD_SANITIZER + if (tcmalloc::tcmalloc_internal::Static::CPUCacheActive()) { + // Already active. + return; + } + +#ifdef ABSL_HAVE_THREAD_SANITIZER // RunningOnValgrind is a proxy for "is something intercepting malloc." // // If Valgrind, et. al., are in use, TCMalloc isn't in use and we shouldn't @@ -1095,7 +1095,7 @@ static void ActivatePerCPUCaches() { if (RunningOnValgrind()) { return; } -#endif +#endif if (Parameters::per_cpu_caches() && subtle::percpu::IsFast()) { Static::InitIfNecessary(); Static::cpu_cache().Activate(CPUCache::ActivationMode::FastPathOn); @@ -1114,27 +1114,27 @@ class PerCPUInitializer { }; static PerCPUInitializer module_enter_exit; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -extern "C" void TCMalloc_Internal_ForceCpuCacheActivation() { - tcmalloc::tcmalloc_internal::ActivatePerCPUCaches(); -} +GOOGLE_MALLOC_SECTION_END +extern "C" void TCMalloc_Internal_ForceCpuCacheActivation() { + tcmalloc::tcmalloc_internal::ActivatePerCPUCaches(); +} + extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() { - return tcmalloc::tcmalloc_internal::Static::CPUCacheActive(); -} - -extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() { - tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false); - tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache(); + return tcmalloc::tcmalloc_internal::Static::CPUCacheActive(); } +extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() { + tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false); + tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache(); +} + extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() { - return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size(); + return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size(); } extern "C" void MallocExtension_Internal_SetMaxPerCpuCacheSize(int32_t value) { - tcmalloc::tcmalloc_internal::Parameters::set_max_per_cpu_cache_size(value); + tcmalloc::tcmalloc_internal::Parameters::set_max_per_cpu_cache_size(value); } diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h index dab7d18910..c66bfa9f9f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h @@ -32,14 +32,14 @@ #include "tcmalloc/thread_cache.h" #include "tcmalloc/tracking.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class CPUCache { public: - constexpr CPUCache() = default; - + constexpr CPUCache() = default; + enum class ActivationMode { FastPathOn, FastPathOffTestOnly, @@ -51,11 +51,11 @@ class CPUCache { void Activate(ActivationMode mode); // Allocate an object of the given size class. When allocation fails - // (from this cache and after running Refill), OOMHandler(size) is + // (from this cache and after running Refill), OOMHandler(size) is // called and its return value is returned from - // Allocate. OOMHandler is used to parameterize out-of-memory + // Allocate. OOMHandler is used to parameterize out-of-memory // handling (raising exception, returning nullptr, calling - // new_handler or anything else). "Passing" OOMHandler in this way + // new_handler or anything else). "Passing" OOMHandler in this way // allows Allocate to be used in tail-call position in fast-path, // making Allocate use jump (tail-call) to slow path code. template <void* OOMHandler(size_t)> @@ -67,9 +67,9 @@ class CPUCache { // Give the number of bytes in <cpu>'s cache uint64_t UsedBytes(int cpu) const; - // Give the allocated number of bytes in <cpu>'s cache - uint64_t Allocated(int cpu) const; - + // Give the allocated number of bytes in <cpu>'s cache + uint64_t Allocated(int cpu) const; + // Whether <cpu>'s cache has ever been populated with objects bool HasPopulated(int cpu) const; @@ -84,48 +84,48 @@ class CPUCache { // Give the number of bytes unallocated to any sizeclass in <cpu>'s cache. uint64_t Unallocated(int cpu) const; - // Gives the total capacity of <cpu>'s cache in bytes. - // - // The total capacity of <cpu>'s cache should be equal to the sum of allocated - // and unallocated bytes for that cache. - uint64_t Capacity(int cpu) const; - + // Gives the total capacity of <cpu>'s cache in bytes. + // + // The total capacity of <cpu>'s cache should be equal to the sum of allocated + // and unallocated bytes for that cache. + uint64_t Capacity(int cpu) const; + // Give the per-cpu limit of cache size. uint64_t CacheLimit() const; - // Shuffles per-cpu caches using the number of underflows and overflows that - // occurred in the prior interval. It selects the top per-cpu caches - // with highest misses as candidates, iterates through the other per-cpu - // caches to steal capacity from them and adds the stolen bytes to the - // available capacity of the per-cpu caches. May be called from any processor. - // - // TODO(vgogte): There are quite a few knobs that we can play around with in - // ShuffleCpuCaches. - void ShuffleCpuCaches(); - - // Sets the lower limit on the capacity that can be stolen from the cpu cache. - static constexpr double kCacheCapacityThreshold = 0.20; - - // Tries to steal <bytes> for the destination <cpu>. It iterates through the - // the set of populated cpu caches and steals the bytes from them. A cpu is - // considered a good candidate to steal from if: - // (1) the cache is populated - // (2) the numbers of underflows and overflows are both less than 0.8x those - // of the destination per-cpu cache - // (3) source cpu is not the same as the destination cpu - // (4) capacity of the source cpu/cl is non-zero - // - // For a given source cpu, we iterate through the size classes to steal from - // them. Currently, we use a similar clock-like algorithm from Steal() to - // identify the cl to steal from. - void StealFromOtherCache(int cpu, int max_populated_cpu, size_t bytes); - - // Tries to reclaim inactive per-CPU caches. It iterates through the set of - // populated cpu caches and reclaims the caches that: - // (1) had same number of used bytes since the last interval, - // (2) had no change in the number of misses since the last interval. - void TryReclaimingCaches(); - + // Shuffles per-cpu caches using the number of underflows and overflows that + // occurred in the prior interval. It selects the top per-cpu caches + // with highest misses as candidates, iterates through the other per-cpu + // caches to steal capacity from them and adds the stolen bytes to the + // available capacity of the per-cpu caches. May be called from any processor. + // + // TODO(vgogte): There are quite a few knobs that we can play around with in + // ShuffleCpuCaches. + void ShuffleCpuCaches(); + + // Sets the lower limit on the capacity that can be stolen from the cpu cache. + static constexpr double kCacheCapacityThreshold = 0.20; + + // Tries to steal <bytes> for the destination <cpu>. It iterates through the + // the set of populated cpu caches and steals the bytes from them. A cpu is + // considered a good candidate to steal from if: + // (1) the cache is populated + // (2) the numbers of underflows and overflows are both less than 0.8x those + // of the destination per-cpu cache + // (3) source cpu is not the same as the destination cpu + // (4) capacity of the source cpu/cl is non-zero + // + // For a given source cpu, we iterate through the size classes to steal from + // them. Currently, we use a similar clock-like algorithm from Steal() to + // identify the cl to steal from. + void StealFromOtherCache(int cpu, int max_populated_cpu, size_t bytes); + + // Tries to reclaim inactive per-CPU caches. It iterates through the set of + // populated cpu caches and reclaims the caches that: + // (1) had same number of used bytes since the last interval, + // (2) had no change in the number of misses since the last interval. + void TryReclaimingCaches(); + // Empty out the cache on <cpu>; move all objects to the central // cache. (If other threads run concurrently on that cpu, we can't // guarantee it will be fully empty on return, but if the cpu is @@ -133,9 +133,9 @@ class CPUCache { // of bytes we sent back. This function is thread safe. uint64_t Reclaim(int cpu); - // Reports number of times the <cpu> has been reclaimed. - uint64_t GetNumReclaims(int cpu) const; - + // Reports number of times the <cpu> has been reclaimed. + uint64_t GetNumReclaims(int cpu) const; + // Determine number of bits we should use for allocating per-cpu cache // The amount of per-cpu cache is 2 ^ kPerCpuShift #if defined(TCMALLOC_SMALL_BUT_SLOW) @@ -144,29 +144,29 @@ class CPUCache { static constexpr size_t kPerCpuShift = 18; #endif - struct CpuCacheMissStats { - size_t underflows; - size_t overflows; - }; - - // Reports total cache underflows and overflows for <cpu>. - CpuCacheMissStats GetTotalCacheMissStats(int cpu) const; - - // Reports the cache underflows and overflows for <cpu> that were recorded at - // the end of the previous interval. It also records current underflows and - // overflows in the reclaim underflow and overflow stats. - CpuCacheMissStats GetReclaimCacheMissStats(int cpu) const; - - // Reports cache underflows and overflows for <cpu> this interval. - CpuCacheMissStats GetIntervalCacheMissStats(int cpu) const; - + struct CpuCacheMissStats { + size_t underflows; + size_t overflows; + }; + + // Reports total cache underflows and overflows for <cpu>. + CpuCacheMissStats GetTotalCacheMissStats(int cpu) const; + + // Reports the cache underflows and overflows for <cpu> that were recorded at + // the end of the previous interval. It also records current underflows and + // overflows in the reclaim underflow and overflow stats. + CpuCacheMissStats GetReclaimCacheMissStats(int cpu) const; + + // Reports cache underflows and overflows for <cpu> this interval. + CpuCacheMissStats GetIntervalCacheMissStats(int cpu) const; + // Report statistics - void Print(Printer* out) const; + void Print(Printer* out) const; void PrintInPbtxt(PbtxtRegion* region) const; - void AcquireInternalLocks(); - void ReleaseInternalLocks(); - + void AcquireInternalLocks(); + void ReleaseInternalLocks(); + private: // Per-size-class freelist resizing info. class PerClassResizeInfo { @@ -195,7 +195,7 @@ class CPUCache { "size mismatch"); }; - subtle::percpu::TcmallocSlab<kNumClasses> freelist_; + subtle::percpu::TcmallocSlab<kNumClasses> freelist_; struct ResizeInfoUnpadded { // cache space on this CPU we're not using. Modify atomically; @@ -210,45 +210,45 @@ class CPUCache { // For cross-cpu operations. absl::base_internal::SpinLock lock; PerClassResizeInfo per_class[kNumClasses]; - // tracks number of underflows on allocate. - std::atomic<size_t> total_underflows; - // tracks number of overflows on deallocate. - std::atomic<size_t> total_overflows; - // tracks number of underflows recorded as of the end of the last shuffle - // interval. - std::atomic<size_t> shuffle_underflows; - // tracks number of overflows recorded as of the end of the last shuffle - // interval. - std::atomic<size_t> shuffle_overflows; - // total cache space available on this CPU. This tracks the total - // allocated and unallocated bytes on this CPU cache. - std::atomic<size_t> capacity; - // Number of underflows as of the end of the last resize interval. - std::atomic<size_t> reclaim_underflows; - // Number of overflows as of the end of the last resize interval. - std::atomic<size_t> reclaim_overflows; - // Used bytes in the cache as of the end of the last resize interval. - std::atomic<uint64_t> reclaim_used_bytes; - // Tracks number of times this CPU has been reclaimed. - std::atomic<size_t> num_reclaims; + // tracks number of underflows on allocate. + std::atomic<size_t> total_underflows; + // tracks number of overflows on deallocate. + std::atomic<size_t> total_overflows; + // tracks number of underflows recorded as of the end of the last shuffle + // interval. + std::atomic<size_t> shuffle_underflows; + // tracks number of overflows recorded as of the end of the last shuffle + // interval. + std::atomic<size_t> shuffle_overflows; + // total cache space available on this CPU. This tracks the total + // allocated and unallocated bytes on this CPU cache. + std::atomic<size_t> capacity; + // Number of underflows as of the end of the last resize interval. + std::atomic<size_t> reclaim_underflows; + // Number of overflows as of the end of the last resize interval. + std::atomic<size_t> reclaim_overflows; + // Used bytes in the cache as of the end of the last resize interval. + std::atomic<uint64_t> reclaim_used_bytes; + // Tracks number of times this CPU has been reclaimed. + std::atomic<size_t> num_reclaims; }; struct ResizeInfo : ResizeInfoUnpadded { char pad[ABSL_CACHELINE_SIZE - sizeof(ResizeInfoUnpadded) % ABSL_CACHELINE_SIZE]; }; // Tracking data for each CPU's cache resizing efforts. - ResizeInfo* resize_ = nullptr; - + ResizeInfo* resize_ = nullptr; + // Track whether we are lazily initializing slabs. We cannot use the latest // value in Parameters, as it can change after initialization. - bool lazy_slabs_ = false; - // The maximum capacity of each size class within the slab. - uint16_t max_capacity_[kNumClasses] = {0}; - - // Provides a hint to StealFromOtherCache() so that we can steal from the - // caches in a round-robin fashion. - std::atomic<int> last_cpu_cache_steal_ = 0; - + bool lazy_slabs_ = false; + // The maximum capacity of each size class within the slab. + uint16_t max_capacity_[kNumClasses] = {0}; + + // Provides a hint to StealFromOtherCache() so that we can steal from the + // caches in a round-robin fashion. + std::atomic<int> last_cpu_cache_steal_ = 0; + // Return a set of objects to be returned to the Transfer Cache. static constexpr int kMaxToReturn = 16; struct ObjectsToReturn { @@ -256,17 +256,17 @@ class CPUCache { int count = kMaxToReturn; // The size class of the returned object. kNumClasses is the // largest value that needs to be stored in cl. - CompactSizeClass cl[kMaxToReturn]; + CompactSizeClass cl[kMaxToReturn]; void* obj[kMaxToReturn]; }; - static size_t MaxCapacityHelper(size_t cl) { - CPUCache& cpu_cache = Static::cpu_cache(); - // Heuristic that the CPUCache has been activated. - ASSERT(cpu_cache.resize_ != nullptr); - return cpu_cache.max_capacity_[cl]; - } - + static size_t MaxCapacityHelper(size_t cl) { + CPUCache& cpu_cache = Static::cpu_cache(); + // Heuristic that the CPUCache has been activated. + ASSERT(cpu_cache.resize_ != nullptr); + return cpu_cache.max_capacity_[cl]; + } + void* Refill(int cpu, size_t cl); // This is called after finding a full freelist when attempting to push <ptr> @@ -292,12 +292,12 @@ class CPUCache { // be freed. size_t Steal(int cpu, size_t cl, size_t bytes, ObjectsToReturn* to_return); - // Records a cache underflow or overflow on <cpu>, increments underflow or - // overflow by 1. - // <is_malloc> determines whether the associated count corresponds to an - // underflow or overflow. - void RecordCacheMissStat(const int cpu, const bool is_malloc); - + // Records a cache underflow or overflow on <cpu>, increments underflow or + // overflow by 1. + // <is_malloc> determines whether the associated count corresponds to an + // underflow or overflow. + void RecordCacheMissStat(const int cpu, const bool is_malloc); + static void* NoopUnderflow(int cpu, size_t cl) { return nullptr; } static int NoopOverflow(int cpu, size_t cl, void* item) { return -1; } }; @@ -312,15 +312,15 @@ inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Allocate(size_t cl) { // we've optimistically reported hit in Allocate, lets undo it and // report miss instead. tracking::Report(kMallocHit, cl, -1); - void* ret = nullptr; - if (Static::sharded_transfer_cache().should_use(cl)) { - ret = Static::sharded_transfer_cache().Pop(cl); - } else { - tracking::Report(kMallocMiss, cl, 1); - CPUCache& cache = Static::cpu_cache(); - cache.RecordCacheMissStat(cpu, true); - ret = cache.Refill(cpu, cl); - } + void* ret = nullptr; + if (Static::sharded_transfer_cache().should_use(cl)) { + ret = Static::sharded_transfer_cache().Pop(cl); + } else { + tracking::Report(kMallocMiss, cl, 1); + CPUCache& cache = Static::cpu_cache(); + cache.RecordCacheMissStat(cpu, true); + ret = cache.Refill(cpu, cl); + } if (ABSL_PREDICT_FALSE(ret == nullptr)) { size_t size = Static::sizemap().class_to_size(cl); return OOMHandler(size); @@ -341,14 +341,14 @@ inline void ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Deallocate(void* ptr, // When we reach here we've already optimistically bumped FreeHits. // Fix that. tracking::Report(kFreeHit, cl, -1); - if (Static::sharded_transfer_cache().should_use(cl)) { - Static::sharded_transfer_cache().Push(cl, ptr); - return 1; - } + if (Static::sharded_transfer_cache().should_use(cl)) { + Static::sharded_transfer_cache().Push(cl, ptr); + return 1; + } tracking::Report(kFreeMiss, cl, 1); - CPUCache& cache = Static::cpu_cache(); - cache.RecordCacheMissStat(cpu, false); - return cache.Overflow(ptr, cl, cpu); + CPUCache& cache = Static::cpu_cache(); + cache.RecordCacheMissStat(cpu, false); + return cache.Overflow(ptr, cl, cpu); } }; freelist_.Push(cl, ptr, Helper::Overflow); @@ -361,7 +361,7 @@ inline bool UsePerCpuCache() { return false; } - if (ABSL_PREDICT_TRUE(subtle::percpu::IsFastNoInit())) { + if (ABSL_PREDICT_TRUE(subtle::percpu::IsFastNoInit())) { return true; } @@ -376,7 +376,7 @@ inline bool UsePerCpuCache() { // If the per-CPU cache for a thread is not initialized, we push ourselves // onto the slow path (if !defined(TCMALLOC_DEPRECATED_PERTHREAD)) until this // occurs. See fast_alloc's use of TryRecordAllocationFast. - if (ABSL_PREDICT_TRUE(subtle::percpu::IsFast())) { + if (ABSL_PREDICT_TRUE(subtle::percpu::IsFast())) { ThreadCache::BecomeIdle(); return true; } @@ -384,7 +384,7 @@ inline bool UsePerCpuCache() { return false; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_CPU_CACHE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc index fd4282b9c3..8cecda36f3 100644 --- a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc @@ -14,24 +14,24 @@ #include "tcmalloc/cpu_cache.h" -#include <thread> // NOLINT(build/c++11) - +#include <thread> // NOLINT(build/c++11) + #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/random/random.h" -#include "absl/random/seed_sequences.h" +#include "absl/random/random.h" +#include "absl/random/seed_sequences.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/optimization.h" #include "tcmalloc/internal/util.h" #include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" -#include "tcmalloc/testing/testutil.h" +#include "tcmalloc/testing/testutil.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { -constexpr size_t kStressSlabs = 4; +constexpr size_t kStressSlabs = 4; void* OOMHandler(size_t) { return nullptr; } TEST(CpuCacheTest, Metadata) { @@ -69,9 +69,9 @@ TEST(CpuCacheTest, Metadata) { int allowed_cpu_id; const size_t kSizeClass = 3; const size_t num_to_move = Static::sizemap().num_objects_to_move(kSizeClass); - const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus() - ? offsetof(kernel_rseq, vcpu_id) - : offsetof(kernel_rseq, cpu_id); + const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus() + ? offsetof(kernel_rseq, vcpu_id) + : offsetof(kernel_rseq, cpu_id); void* ptr; { // Restrict this thread to a single core while allocating and processing the @@ -82,14 +82,14 @@ TEST(CpuCacheTest, Metadata) { // pages to be faulted for those cores, leading to test flakiness. tcmalloc_internal::ScopedAffinityMask mask( tcmalloc_internal::AllowedCpus()[0]); - allowed_cpu_id = - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset); + allowed_cpu_id = + subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset); ptr = cache.Allocate<OOMHandler>(kSizeClass); if (mask.Tampered() || - allowed_cpu_id != - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) { + allowed_cpu_id != + subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) { return; } } @@ -146,9 +146,9 @@ TEST(CpuCacheTest, Metadata) { } EXPECT_LE(cache.Unallocated(cpu), max_cpu_cache_size); - EXPECT_EQ(cache.Capacity(cpu), max_cpu_cache_size); - EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), - cache.Capacity(cpu)); + EXPECT_EQ(cache.Capacity(cpu), max_cpu_cache_size); + EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), + cache.Capacity(cpu)); } for (int cl = 0; cl < kNumClasses; ++cl) { @@ -180,420 +180,420 @@ TEST(CpuCacheTest, Metadata) { } } -TEST(CpuCacheTest, CacheMissStats) { - if (!subtle::percpu::IsFast()) { - return; - } - - const int num_cpus = absl::base_internal::NumCPUs(); - - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); - - // The number of underflows and overflows must be zero for all the caches. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - CPUCache::CpuCacheMissStats total_misses = - cache.GetTotalCacheMissStats(cpu); - CPUCache::CpuCacheMissStats interval_misses = - cache.GetIntervalCacheMissStats(cpu); - EXPECT_EQ(total_misses.underflows, 0); - EXPECT_EQ(total_misses.overflows, 0); - EXPECT_EQ(interval_misses.underflows, 0); - EXPECT_EQ(interval_misses.overflows, 0); - } - - int allowed_cpu_id; - const size_t kSizeClass = 3; - const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus() - ? offsetof(kernel_rseq, vcpu_id) - : offsetof(kernel_rseq, cpu_id); - void* ptr; - { - // Restrict this thread to a single core while allocating and processing the - // slow path. - // - // TODO(b/151313823): Without this restriction, we may access--for reading - // only--other slabs if we end up being migrated. These may cause huge - // pages to be faulted for those cores, leading to test flakiness. - tcmalloc_internal::ScopedAffinityMask mask( - tcmalloc_internal::AllowedCpus()[0]); - allowed_cpu_id = - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset); - - ptr = cache.Allocate<OOMHandler>(kSizeClass); - - if (mask.Tampered() || - allowed_cpu_id != - subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) { - return; - } - } - - for (int cpu = 0; cpu < num_cpus; ++cpu) { - CPUCache::CpuCacheMissStats total_misses = - cache.GetTotalCacheMissStats(cpu); - CPUCache::CpuCacheMissStats interval_misses = - cache.GetIntervalCacheMissStats(cpu); - if (cpu == allowed_cpu_id) { - EXPECT_EQ(total_misses.underflows, 1); - EXPECT_EQ(interval_misses.underflows, 1); - } else { - EXPECT_EQ(total_misses.underflows, 0); - EXPECT_EQ(interval_misses.underflows, 0); - } - EXPECT_EQ(total_misses.overflows, 0); - EXPECT_EQ(interval_misses.overflows, 0); - } - - // Tear down. - // - // TODO(ckennelly): We're interacting with the real TransferCache. - cache.Deallocate(ptr, kSizeClass); - - for (int i = 0; i < num_cpus; i++) { - cache.Reclaim(i); - } -} - -static void ShuffleThread(const std::atomic<bool>& stop) { - if (!subtle::percpu::IsFast()) { - return; - } - - CPUCache& cache = Static::cpu_cache(); - // Wake up every 10ms to shuffle the caches so that we can allow misses to - // accumulate during that interval - while (!stop) { - cache.ShuffleCpuCaches(); - absl::SleepFor(absl::Milliseconds(10)); - } -} - -static void StressThread(size_t thread_id, const std::atomic<bool>& stop) { - if (!subtle::percpu::IsFast()) { - return; - } - - CPUCache& cache = Static::cpu_cache(); - std::vector<std::pair<size_t, void*>> blocks; - absl::BitGen rnd; - while (!stop) { - const int what = absl::Uniform<int32_t>(rnd, 0, 2); - if (what) { - // Allocate an object for a class - size_t cl = absl::Uniform<int32_t>(rnd, 1, kStressSlabs + 1); - void* ptr = cache.Allocate<OOMHandler>(cl); - blocks.emplace_back(std::make_pair(cl, ptr)); - } else { - // Deallocate an object for a class - if (!blocks.empty()) { - cache.Deallocate(blocks.back().second, blocks.back().first); - blocks.pop_back(); - } - } - } - - // Cleaup. Deallocate rest of the allocated memory. - for (int i = 0; i < blocks.size(); i++) { - cache.Deallocate(blocks[i].second, blocks[i].first); - } -} - -TEST(CpuCacheTest, StealCpuCache) { - if (!subtle::percpu::IsFast()) { - return; - } - - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); - - std::vector<std::thread> threads; - std::thread shuffle_thread; - const int n_threads = absl::base_internal::NumCPUs(); - std::atomic<bool> stop(false); - - for (size_t t = 0; t < n_threads; ++t) { - threads.push_back(std::thread(StressThread, t, std::ref(stop))); - } - shuffle_thread = std::thread(ShuffleThread, std::ref(stop)); - - absl::SleepFor(absl::Seconds(5)); - stop = true; - for (auto& t : threads) { - t.join(); - } - shuffle_thread.join(); - - // Check that the total capacity is preserved after the shuffle. - size_t capacity = 0; - const int num_cpus = absl::base_internal::NumCPUs(); - const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size(); - for (int cpu = 0; cpu < num_cpus; ++cpu) { - EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), - cache.Capacity(cpu)); - capacity += cache.Capacity(cpu); - } - EXPECT_EQ(capacity, kTotalCapacity); - - for (int cpu = 0; cpu < num_cpus; ++cpu) { - cache.Reclaim(cpu); - } -} - -// Runs a single allocate and deallocate operation to warm up the cache. Once a -// few objects are allocated in the cold cache, we can shuffle cpu caches to -// steal that capacity from the cold cache to the hot cache. -static void ColdCacheOperations(int cpu_id, size_t size_class) { - // Temporarily fake being on the given CPU. - ScopedFakeCpuId fake_cpu_id(cpu_id); - - CPUCache& cache = Static::cpu_cache(); -#if TCMALLOC_PERCPU_USE_RSEQ - if (subtle::percpu::UsingFlatVirtualCpus()) { - subtle::percpu::__rseq_abi.vcpu_id = cpu_id; - } -#endif - - void* ptr = cache.Allocate<OOMHandler>(size_class); - cache.Deallocate(ptr, size_class); -} - -// Runs multiple allocate and deallocate operation on the cpu cache to collect -// misses. Once we collect enough misses on this cache, we can shuffle cpu -// caches to steal capacity from colder caches to the hot cache. -static void HotCacheOperations(int cpu_id) { - // Temporarily fake being on the given CPU. - ScopedFakeCpuId fake_cpu_id(cpu_id); - - CPUCache& cache = Static::cpu_cache(); -#if TCMALLOC_PERCPU_USE_RSEQ - if (subtle::percpu::UsingFlatVirtualCpus()) { - subtle::percpu::__rseq_abi.vcpu_id = cpu_id; - } -#endif - - // Allocate and deallocate objects to make sure we have enough misses on the - // cache. This will make sure we have sufficient disparity in misses between - // the hotter and colder cache, and that we may be able to steal bytes from - // the colder cache. - for (size_t cl = 1; cl <= kStressSlabs; ++cl) { - void* ptr = cache.Allocate<OOMHandler>(cl); - cache.Deallocate(ptr, cl); - } - - // We reclaim the cache to reset it so that we record underflows/overflows the - // next time we allocate and deallocate objects. Without reclaim, the cache - // would stay warmed up and it would take more time to drain the colder cache. - cache.Reclaim(cpu_id); -} - -TEST(CpuCacheTest, ColdHotCacheShuffleTest) { - if (!subtle::percpu::IsFast()) { - return; - } - - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); - - constexpr int hot_cpu_id = 0; - constexpr int cold_cpu_id = 1; - - const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size(); - - // Empirical tests suggest that we should be able to steal all the steal-able - // capacity from colder cache in < 100 tries. Keeping enough buffer here to - // make sure we steal from colder cache, while at the same time avoid timeouts - // if something goes bad. - constexpr int kMaxStealTries = 1000; - - // We allocate and deallocate a single highest cl object. - // This makes sure that we have a single large object in the cache that faster - // cache can steal. - const size_t size_class = kNumClasses - 1; - - for (int num_tries = 0; - num_tries < kMaxStealTries && - cache.Capacity(cold_cpu_id) > - CPUCache::kCacheCapacityThreshold * max_cpu_cache_size; - ++num_tries) { - ColdCacheOperations(cold_cpu_id, size_class); - HotCacheOperations(hot_cpu_id); - cache.ShuffleCpuCaches(); - - // Check that the capacity is preserved. - EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id), - cache.Capacity(cold_cpu_id)); - EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id), - cache.Capacity(hot_cpu_id)); - } - - size_t cold_cache_capacity = cache.Capacity(cold_cpu_id); - size_t hot_cache_capacity = cache.Capacity(hot_cpu_id); - - // Check that we drained cold cache to the lower capacity limit. - // We also keep some tolerance, up to the largest class size, below the lower - // capacity threshold that we can drain cold cache to. - EXPECT_GT(cold_cache_capacity, - CPUCache::kCacheCapacityThreshold * max_cpu_cache_size - - Static::sizemap().class_to_size(kNumClasses - 1)); - - // Check that we have at least stolen some capacity. - EXPECT_GT(hot_cache_capacity, max_cpu_cache_size); - - // Perform a few more shuffles to make sure that lower cache capacity limit - // has been reached for the cold cache. A few more shuffles should not - // change the capacity of either of the caches. - for (int i = 0; i < 100; ++i) { - ColdCacheOperations(cold_cpu_id, size_class); - HotCacheOperations(hot_cpu_id); - cache.ShuffleCpuCaches(); - - // Check that the capacity is preserved. - EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id), - cache.Capacity(cold_cpu_id)); - EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id), - cache.Capacity(hot_cpu_id)); - } - - // Check that the capacity of cold and hot caches is same as before. - EXPECT_EQ(cache.Capacity(cold_cpu_id), cold_cache_capacity); - EXPECT_EQ(cache.Capacity(hot_cpu_id), hot_cache_capacity); - - // Make sure that the total capacity is preserved. - EXPECT_EQ(cache.Capacity(cold_cpu_id) + cache.Capacity(hot_cpu_id), - 2 * max_cpu_cache_size); - - // Reclaim caches. - const int num_cpus = absl::base_internal::NumCPUs(); - for (int cpu = 0; cpu < num_cpus; ++cpu) { - cache.Reclaim(cpu); - } -} - -TEST(CpuCacheTest, ReclaimCpuCache) { - if (!subtle::percpu::IsFast()) { - return; - } - - CPUCache& cache = Static::cpu_cache(); - // Since this test allocates memory, avoid activating the real fast path to - // minimize allocations against the per-CPU cache. - cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); - - // The number of underflows and overflows must be zero for all the caches. - const int num_cpus = absl::base_internal::NumCPUs(); - for (int cpu = 0; cpu < num_cpus; ++cpu) { - SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - // Check that reclaim miss metrics are reset. - CPUCache::CpuCacheMissStats reclaim_misses = - cache.GetReclaimCacheMissStats(cpu); - EXPECT_EQ(reclaim_misses.underflows, 0); - EXPECT_EQ(reclaim_misses.overflows, 0); - - // None of the caches should have been reclaimed yet. - EXPECT_EQ(cache.GetNumReclaims(cpu), 0); - - // Check that caches are empty. - uint64_t used_bytes = cache.UsedBytes(cpu); - EXPECT_EQ(used_bytes, 0); - } - - const size_t kSizeClass = 3; - - // We chose a different size class here so that we can populate different size - // class slots and change the number of bytes used by the busy cache later in - // our test. - const size_t kBusySizeClass = 4; - - // Perform some operations to warm up caches and make sure they are populated. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - ColdCacheOperations(cpu, kSizeClass); - EXPECT_TRUE(cache.HasPopulated(cpu)); - } - - for (int cpu = 0; cpu < num_cpus; ++cpu) { - SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - CPUCache::CpuCacheMissStats misses_last_interval = - cache.GetReclaimCacheMissStats(cpu); - CPUCache::CpuCacheMissStats total_misses = - cache.GetTotalCacheMissStats(cpu); - - // Misses since the last reclaim (i.e. since we initialized the caches) - // should match the total miss metrics. - EXPECT_EQ(misses_last_interval.underflows, total_misses.underflows); - EXPECT_EQ(misses_last_interval.overflows, total_misses.overflows); - - // Caches should have non-zero used bytes. - EXPECT_GT(cache.UsedBytes(cpu), 0); - } - - cache.TryReclaimingCaches(); - - // Miss metrics since the last interval were non-zero and the change in used - // bytes was non-zero, so none of the caches should get reclaimed. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - // As no cache operations were performed since the last reclaim - // operation, the reclaim misses captured during the last interval (i.e. - // since the last reclaim) should be zero. - CPUCache::CpuCacheMissStats reclaim_misses = - cache.GetReclaimCacheMissStats(cpu); - EXPECT_EQ(reclaim_misses.underflows, 0); - EXPECT_EQ(reclaim_misses.overflows, 0); - - // None of the caches should have been reclaimed as the caches were - // accessed in the previous interval. - EXPECT_EQ(cache.GetNumReclaims(cpu), 0); - - // Caches should not have been reclaimed; used bytes should be non-zero. - EXPECT_GT(cache.UsedBytes(cpu), 0); - } - - absl::BitGen rnd; - const int busy_cpu = - absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()); - const size_t prev_used = cache.UsedBytes(busy_cpu); - ColdCacheOperations(busy_cpu, kBusySizeClass); - EXPECT_GT(cache.UsedBytes(busy_cpu), prev_used); - - // Try reclaiming caches again. - cache.TryReclaimingCaches(); - - // All caches, except the busy cpu cache against which we performed some - // operations in the previous interval, should have been reclaimed exactly - // once. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - if (cpu == busy_cpu) { - EXPECT_GT(cache.UsedBytes(cpu), 0); - EXPECT_EQ(cache.GetNumReclaims(cpu), 0); - } else { - EXPECT_EQ(cache.UsedBytes(cpu), 0); - EXPECT_EQ(cache.GetNumReclaims(cpu), 1); - } - } - - // Try reclaiming caches again. - cache.TryReclaimingCaches(); - - // All caches, including the busy cache, should have been reclaimed this - // time. Note that the caches that were reclaimed in the previous interval - // should not be reclaimed again and the number of reclaims reported for them - // should still be one. - for (int cpu = 0; cpu < num_cpus; ++cpu) { - SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); - EXPECT_EQ(cache.UsedBytes(cpu), 0); - EXPECT_EQ(cache.GetNumReclaims(cpu), 1); - } -} - +TEST(CpuCacheTest, CacheMissStats) { + if (!subtle::percpu::IsFast()) { + return; + } + + const int num_cpus = absl::base_internal::NumCPUs(); + + CPUCache& cache = Static::cpu_cache(); + // Since this test allocates memory, avoid activating the real fast path to + // minimize allocations against the per-CPU cache. + cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + + // The number of underflows and overflows must be zero for all the caches. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + CPUCache::CpuCacheMissStats total_misses = + cache.GetTotalCacheMissStats(cpu); + CPUCache::CpuCacheMissStats interval_misses = + cache.GetIntervalCacheMissStats(cpu); + EXPECT_EQ(total_misses.underflows, 0); + EXPECT_EQ(total_misses.overflows, 0); + EXPECT_EQ(interval_misses.underflows, 0); + EXPECT_EQ(interval_misses.overflows, 0); + } + + int allowed_cpu_id; + const size_t kSizeClass = 3; + const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus() + ? offsetof(kernel_rseq, vcpu_id) + : offsetof(kernel_rseq, cpu_id); + void* ptr; + { + // Restrict this thread to a single core while allocating and processing the + // slow path. + // + // TODO(b/151313823): Without this restriction, we may access--for reading + // only--other slabs if we end up being migrated. These may cause huge + // pages to be faulted for those cores, leading to test flakiness. + tcmalloc_internal::ScopedAffinityMask mask( + tcmalloc_internal::AllowedCpus()[0]); + allowed_cpu_id = + subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset); + + ptr = cache.Allocate<OOMHandler>(kSizeClass); + + if (mask.Tampered() || + allowed_cpu_id != + subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) { + return; + } + } + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + CPUCache::CpuCacheMissStats total_misses = + cache.GetTotalCacheMissStats(cpu); + CPUCache::CpuCacheMissStats interval_misses = + cache.GetIntervalCacheMissStats(cpu); + if (cpu == allowed_cpu_id) { + EXPECT_EQ(total_misses.underflows, 1); + EXPECT_EQ(interval_misses.underflows, 1); + } else { + EXPECT_EQ(total_misses.underflows, 0); + EXPECT_EQ(interval_misses.underflows, 0); + } + EXPECT_EQ(total_misses.overflows, 0); + EXPECT_EQ(interval_misses.overflows, 0); + } + + // Tear down. + // + // TODO(ckennelly): We're interacting with the real TransferCache. + cache.Deallocate(ptr, kSizeClass); + + for (int i = 0; i < num_cpus; i++) { + cache.Reclaim(i); + } +} + +static void ShuffleThread(const std::atomic<bool>& stop) { + if (!subtle::percpu::IsFast()) { + return; + } + + CPUCache& cache = Static::cpu_cache(); + // Wake up every 10ms to shuffle the caches so that we can allow misses to + // accumulate during that interval + while (!stop) { + cache.ShuffleCpuCaches(); + absl::SleepFor(absl::Milliseconds(10)); + } +} + +static void StressThread(size_t thread_id, const std::atomic<bool>& stop) { + if (!subtle::percpu::IsFast()) { + return; + } + + CPUCache& cache = Static::cpu_cache(); + std::vector<std::pair<size_t, void*>> blocks; + absl::BitGen rnd; + while (!stop) { + const int what = absl::Uniform<int32_t>(rnd, 0, 2); + if (what) { + // Allocate an object for a class + size_t cl = absl::Uniform<int32_t>(rnd, 1, kStressSlabs + 1); + void* ptr = cache.Allocate<OOMHandler>(cl); + blocks.emplace_back(std::make_pair(cl, ptr)); + } else { + // Deallocate an object for a class + if (!blocks.empty()) { + cache.Deallocate(blocks.back().second, blocks.back().first); + blocks.pop_back(); + } + } + } + + // Cleaup. Deallocate rest of the allocated memory. + for (int i = 0; i < blocks.size(); i++) { + cache.Deallocate(blocks[i].second, blocks[i].first); + } +} + +TEST(CpuCacheTest, StealCpuCache) { + if (!subtle::percpu::IsFast()) { + return; + } + + CPUCache& cache = Static::cpu_cache(); + // Since this test allocates memory, avoid activating the real fast path to + // minimize allocations against the per-CPU cache. + cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + + std::vector<std::thread> threads; + std::thread shuffle_thread; + const int n_threads = absl::base_internal::NumCPUs(); + std::atomic<bool> stop(false); + + for (size_t t = 0; t < n_threads; ++t) { + threads.push_back(std::thread(StressThread, t, std::ref(stop))); + } + shuffle_thread = std::thread(ShuffleThread, std::ref(stop)); + + absl::SleepFor(absl::Seconds(5)); + stop = true; + for (auto& t : threads) { + t.join(); + } + shuffle_thread.join(); + + // Check that the total capacity is preserved after the shuffle. + size_t capacity = 0; + const int num_cpus = absl::base_internal::NumCPUs(); + const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size(); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu), + cache.Capacity(cpu)); + capacity += cache.Capacity(cpu); + } + EXPECT_EQ(capacity, kTotalCapacity); + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + cache.Reclaim(cpu); + } +} + +// Runs a single allocate and deallocate operation to warm up the cache. Once a +// few objects are allocated in the cold cache, we can shuffle cpu caches to +// steal that capacity from the cold cache to the hot cache. +static void ColdCacheOperations(int cpu_id, size_t size_class) { + // Temporarily fake being on the given CPU. + ScopedFakeCpuId fake_cpu_id(cpu_id); + + CPUCache& cache = Static::cpu_cache(); +#if TCMALLOC_PERCPU_USE_RSEQ + if (subtle::percpu::UsingFlatVirtualCpus()) { + subtle::percpu::__rseq_abi.vcpu_id = cpu_id; + } +#endif + + void* ptr = cache.Allocate<OOMHandler>(size_class); + cache.Deallocate(ptr, size_class); +} + +// Runs multiple allocate and deallocate operation on the cpu cache to collect +// misses. Once we collect enough misses on this cache, we can shuffle cpu +// caches to steal capacity from colder caches to the hot cache. +static void HotCacheOperations(int cpu_id) { + // Temporarily fake being on the given CPU. + ScopedFakeCpuId fake_cpu_id(cpu_id); + + CPUCache& cache = Static::cpu_cache(); +#if TCMALLOC_PERCPU_USE_RSEQ + if (subtle::percpu::UsingFlatVirtualCpus()) { + subtle::percpu::__rseq_abi.vcpu_id = cpu_id; + } +#endif + + // Allocate and deallocate objects to make sure we have enough misses on the + // cache. This will make sure we have sufficient disparity in misses between + // the hotter and colder cache, and that we may be able to steal bytes from + // the colder cache. + for (size_t cl = 1; cl <= kStressSlabs; ++cl) { + void* ptr = cache.Allocate<OOMHandler>(cl); + cache.Deallocate(ptr, cl); + } + + // We reclaim the cache to reset it so that we record underflows/overflows the + // next time we allocate and deallocate objects. Without reclaim, the cache + // would stay warmed up and it would take more time to drain the colder cache. + cache.Reclaim(cpu_id); +} + +TEST(CpuCacheTest, ColdHotCacheShuffleTest) { + if (!subtle::percpu::IsFast()) { + return; + } + + CPUCache& cache = Static::cpu_cache(); + // Since this test allocates memory, avoid activating the real fast path to + // minimize allocations against the per-CPU cache. + cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + + constexpr int hot_cpu_id = 0; + constexpr int cold_cpu_id = 1; + + const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size(); + + // Empirical tests suggest that we should be able to steal all the steal-able + // capacity from colder cache in < 100 tries. Keeping enough buffer here to + // make sure we steal from colder cache, while at the same time avoid timeouts + // if something goes bad. + constexpr int kMaxStealTries = 1000; + + // We allocate and deallocate a single highest cl object. + // This makes sure that we have a single large object in the cache that faster + // cache can steal. + const size_t size_class = kNumClasses - 1; + + for (int num_tries = 0; + num_tries < kMaxStealTries && + cache.Capacity(cold_cpu_id) > + CPUCache::kCacheCapacityThreshold * max_cpu_cache_size; + ++num_tries) { + ColdCacheOperations(cold_cpu_id, size_class); + HotCacheOperations(hot_cpu_id); + cache.ShuffleCpuCaches(); + + // Check that the capacity is preserved. + EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id), + cache.Capacity(cold_cpu_id)); + EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id), + cache.Capacity(hot_cpu_id)); + } + + size_t cold_cache_capacity = cache.Capacity(cold_cpu_id); + size_t hot_cache_capacity = cache.Capacity(hot_cpu_id); + + // Check that we drained cold cache to the lower capacity limit. + // We also keep some tolerance, up to the largest class size, below the lower + // capacity threshold that we can drain cold cache to. + EXPECT_GT(cold_cache_capacity, + CPUCache::kCacheCapacityThreshold * max_cpu_cache_size - + Static::sizemap().class_to_size(kNumClasses - 1)); + + // Check that we have at least stolen some capacity. + EXPECT_GT(hot_cache_capacity, max_cpu_cache_size); + + // Perform a few more shuffles to make sure that lower cache capacity limit + // has been reached for the cold cache. A few more shuffles should not + // change the capacity of either of the caches. + for (int i = 0; i < 100; ++i) { + ColdCacheOperations(cold_cpu_id, size_class); + HotCacheOperations(hot_cpu_id); + cache.ShuffleCpuCaches(); + + // Check that the capacity is preserved. + EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id), + cache.Capacity(cold_cpu_id)); + EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id), + cache.Capacity(hot_cpu_id)); + } + + // Check that the capacity of cold and hot caches is same as before. + EXPECT_EQ(cache.Capacity(cold_cpu_id), cold_cache_capacity); + EXPECT_EQ(cache.Capacity(hot_cpu_id), hot_cache_capacity); + + // Make sure that the total capacity is preserved. + EXPECT_EQ(cache.Capacity(cold_cpu_id) + cache.Capacity(hot_cpu_id), + 2 * max_cpu_cache_size); + + // Reclaim caches. + const int num_cpus = absl::base_internal::NumCPUs(); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + cache.Reclaim(cpu); + } +} + +TEST(CpuCacheTest, ReclaimCpuCache) { + if (!subtle::percpu::IsFast()) { + return; + } + + CPUCache& cache = Static::cpu_cache(); + // Since this test allocates memory, avoid activating the real fast path to + // minimize allocations against the per-CPU cache. + cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly); + + // The number of underflows and overflows must be zero for all the caches. + const int num_cpus = absl::base_internal::NumCPUs(); + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + // Check that reclaim miss metrics are reset. + CPUCache::CpuCacheMissStats reclaim_misses = + cache.GetReclaimCacheMissStats(cpu); + EXPECT_EQ(reclaim_misses.underflows, 0); + EXPECT_EQ(reclaim_misses.overflows, 0); + + // None of the caches should have been reclaimed yet. + EXPECT_EQ(cache.GetNumReclaims(cpu), 0); + + // Check that caches are empty. + uint64_t used_bytes = cache.UsedBytes(cpu); + EXPECT_EQ(used_bytes, 0); + } + + const size_t kSizeClass = 3; + + // We chose a different size class here so that we can populate different size + // class slots and change the number of bytes used by the busy cache later in + // our test. + const size_t kBusySizeClass = 4; + + // Perform some operations to warm up caches and make sure they are populated. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + ColdCacheOperations(cpu, kSizeClass); + EXPECT_TRUE(cache.HasPopulated(cpu)); + } + + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + CPUCache::CpuCacheMissStats misses_last_interval = + cache.GetReclaimCacheMissStats(cpu); + CPUCache::CpuCacheMissStats total_misses = + cache.GetTotalCacheMissStats(cpu); + + // Misses since the last reclaim (i.e. since we initialized the caches) + // should match the total miss metrics. + EXPECT_EQ(misses_last_interval.underflows, total_misses.underflows); + EXPECT_EQ(misses_last_interval.overflows, total_misses.overflows); + + // Caches should have non-zero used bytes. + EXPECT_GT(cache.UsedBytes(cpu), 0); + } + + cache.TryReclaimingCaches(); + + // Miss metrics since the last interval were non-zero and the change in used + // bytes was non-zero, so none of the caches should get reclaimed. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + // As no cache operations were performed since the last reclaim + // operation, the reclaim misses captured during the last interval (i.e. + // since the last reclaim) should be zero. + CPUCache::CpuCacheMissStats reclaim_misses = + cache.GetReclaimCacheMissStats(cpu); + EXPECT_EQ(reclaim_misses.underflows, 0); + EXPECT_EQ(reclaim_misses.overflows, 0); + + // None of the caches should have been reclaimed as the caches were + // accessed in the previous interval. + EXPECT_EQ(cache.GetNumReclaims(cpu), 0); + + // Caches should not have been reclaimed; used bytes should be non-zero. + EXPECT_GT(cache.UsedBytes(cpu), 0); + } + + absl::BitGen rnd; + const int busy_cpu = + absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()); + const size_t prev_used = cache.UsedBytes(busy_cpu); + ColdCacheOperations(busy_cpu, kBusySizeClass); + EXPECT_GT(cache.UsedBytes(busy_cpu), prev_used); + + // Try reclaiming caches again. + cache.TryReclaimingCaches(); + + // All caches, except the busy cpu cache against which we performed some + // operations in the previous interval, should have been reclaimed exactly + // once. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + if (cpu == busy_cpu) { + EXPECT_GT(cache.UsedBytes(cpu), 0); + EXPECT_EQ(cache.GetNumReclaims(cpu), 0); + } else { + EXPECT_EQ(cache.UsedBytes(cpu), 0); + EXPECT_EQ(cache.GetNumReclaims(cpu), 1); + } + } + + // Try reclaiming caches again. + cache.TryReclaimingCaches(); + + // All caches, including the busy cache, should have been reclaimed this + // time. Note that the caches that were reclaimed in the previous interval + // should not be reclaimed again and the number of reclaims reported for them + // should still be one. + for (int cpu = 0; cpu < num_cpus; ++cpu) { + SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu)); + EXPECT_EQ(cache.UsedBytes(cpu), 0); + EXPECT_EQ(cache.GetNumReclaims(cpu), 1); + } +} + } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.cc b/contrib/libs/tcmalloc/tcmalloc/experiment.cc index 1c425fbf9e..4f6f5dbe31 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experiment.cc @@ -22,9 +22,9 @@ #include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { const char kDelimiter = ','; @@ -50,9 +50,9 @@ const bool* GetSelectedExperiments() { static const bool* status = [&]() { const char* active_experiments = thread_safe_getenv(kExperiments); const char* disabled_experiments = thread_safe_getenv(kDisableExperiments); - return SelectExperiments(by_id, - active_experiments ? active_experiments : "", - disabled_experiments ? disabled_experiments : ""); + return SelectExperiments(by_id, + active_experiments ? active_experiments : "", + disabled_experiments ? disabled_experiments : ""); }(); return status; } @@ -106,7 +106,7 @@ const bool* SelectExperiments(bool* buffer, absl::string_view active, return buffer; } -void PrintExperiments(Printer* printer) { +void PrintExperiments(Printer* printer) { // Index experiments by their positions in the experiments array, rather than // by experiment ID. static bool active[ABSL_ARRAYSIZE(experiments)]; @@ -131,32 +131,32 @@ void PrintExperiments(Printer* printer) { printer->printf("\n"); } -void FillExperimentProperties( - std::map<std::string, MallocExtension::Property>* result) { - for (const auto& config : experiments) { - (*result)[absl::StrCat("tcmalloc.experiment.", config.name)].value = - IsExperimentActive(config.id) ? 1 : 0; - } -} - -} // namespace tcmalloc_internal - -bool IsExperimentActive(Experiment exp) { - ASSERT(static_cast<int>(exp) >= 0); - ASSERT(exp < Experiment::kMaxExperimentID); - - return tcmalloc_internal::GetSelectedExperiments()[static_cast<int>(exp)]; -} - -absl::optional<Experiment> FindExperimentByName(absl::string_view name) { - for (const auto& config : experiments) { - if (name == config.name) { - return config.id; - } - } - - return absl::nullopt; -} - +void FillExperimentProperties( + std::map<std::string, MallocExtension::Property>* result) { + for (const auto& config : experiments) { + (*result)[absl::StrCat("tcmalloc.experiment.", config.name)].value = + IsExperimentActive(config.id) ? 1 : 0; + } +} + +} // namespace tcmalloc_internal + +bool IsExperimentActive(Experiment exp) { + ASSERT(static_cast<int>(exp) >= 0); + ASSERT(exp < Experiment::kMaxExperimentID); + + return tcmalloc_internal::GetSelectedExperiments()[static_cast<int>(exp)]; +} + +absl::optional<Experiment> FindExperimentByName(absl::string_view name) { + for (const auto& config : experiments) { + if (name == config.name) { + return config.id; + } + } + + return absl::nullopt; +} + } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.h b/contrib/libs/tcmalloc/tcmalloc/experiment.h index 90b3049df1..d04387952f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment.h +++ b/contrib/libs/tcmalloc/tcmalloc/experiment.h @@ -38,9 +38,9 @@ // BORG_DISABLE_EXPERIMENTS=all *or* // BORG_DISABLE_EXPERIMENTS=BAD_EXPERIMENT_LABEL -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { constexpr size_t kNumExperiments = static_cast<size_t>(Experiment::kMaxExperimentID); @@ -54,18 +54,18 @@ constexpr size_t kNumExperiments = const bool* SelectExperiments(bool* buffer, absl::string_view active, absl::string_view disabled); -void FillExperimentProperties( - std::map<std::string, MallocExtension::Property>* result); - -void PrintExperiments(Printer* printer); - -} // namespace tcmalloc_internal +void FillExperimentProperties( + std::map<std::string, MallocExtension::Property>* result); +void PrintExperiments(Printer* printer); + +} // namespace tcmalloc_internal + bool IsExperimentActive(Experiment exp); absl::optional<Experiment> FindExperimentByName(absl::string_view name); } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_EXPERIMENT_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_config.h b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h index 294c0374e4..a34969c4b6 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment_config.h +++ b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h @@ -23,10 +23,10 @@ namespace tcmalloc { enum class Experiment : int { TCMALLOC_TEMERAIRE, TCMALLOC_SANS_56_SIZECLASS, - TEST_ONLY_TCMALLOC_POW2_SIZECLASS, - TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, - TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, - TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, + TEST_ONLY_TCMALLOC_POW2_SIZECLASS, + TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, + TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, + TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, kMaxExperimentID, }; @@ -39,10 +39,10 @@ struct ExperimentConfig { inline constexpr ExperimentConfig experiments[] = { {Experiment::TCMALLOC_TEMERAIRE, "TCMALLOC_TEMERAIRE"}, {Experiment::TCMALLOC_SANS_56_SIZECLASS, "TCMALLOC_SANS_56_SIZECLASS"}, - {Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_SIZECLASS"}, - {Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS"}, - {Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE"}, - {Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE"}, + {Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_SIZECLASS"}, + {Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS"}, + {Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE"}, + {Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE"}, }; // clang-format on diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc index 2a7afe9b85..f392cfba17 100644 --- a/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc @@ -22,7 +22,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) { const char* data = reinterpret_cast<const char*>(d); - bool buffer[tcmalloc::tcmalloc_internal::kNumExperiments]; + bool buffer[tcmalloc::tcmalloc_internal::kNumExperiments]; absl::string_view active, disabled; const char* split = static_cast<const char*>(memchr(data, ';', size)); @@ -33,6 +33,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) { disabled = absl::string_view(split + 1, size - (split - data + 1)); } - tcmalloc::tcmalloc_internal::SelectExperiments(buffer, active, disabled); + tcmalloc::tcmalloc_internal::SelectExperiments(buffer, active, disabled); return 0; } diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc index c6769f450e..ba41dd2ee3 100755 --- a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc @@ -1,679 +1,679 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/common.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { - -namespace tcmalloc_internal { - -// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation -// and other factors. For instance, if we have a 96 byte size class, and use a -// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes -// left over. There is also a fixed component of 48 bytes of TCMalloc metadata -// per span. Together, the fixed overhead would be wasted/allocated = -// (32 + 48) / (8192 - 32) ~= 0.98%. -// There is also a dynamic component to overhead based on mismatches between the -// number of bytes requested and the number of bytes provided by the size class. -// Together they sum to the total overhead; for instance if you asked for a -// 50-byte allocation that rounds up to a 64-byte size class, the dynamic -// overhead would be 28%, and if <fixed> were 22% it would mean (on average) -// 25 bytes of overhead for allocations of that size. - -// clang-format off -#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 82; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 72, 1, 32}, // 1.28% - { 80, 1, 32}, // 0.98% - { 88, 1, 32}, // 0.68% - { 96, 1, 32}, // 0.98% - { 104, 1, 32}, // 1.58% - { 112, 1, 32}, // 0.78% - { 120, 1, 32}, // 0.98% - { 128, 1, 32}, // 0.59% - { 136, 1, 32}, // 0.98% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 296, 1, 32}, // 3.10% - { 312, 1, 32}, // 1.58% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 408, 1, 32}, // 0.98% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 1, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 7168, 7, 9}, // 0.08% - { 8192, 1, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 98304, 12, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 147456, 18, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 74; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 72, 1, 32}, // 0.17% - { 80, 1, 32}, // 0.29% - { 88, 1, 32}, // 0.24% - { 96, 1, 32}, // 0.24% - { 104, 1, 32}, // 0.17% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 280, 1, 32}, // 0.17% - { 304, 1, 32}, // 0.89% - { 328, 1, 32}, // 1.06% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 488, 1, 32}, // 0.37% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 85; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 72, 1, 32}, // 0.04% - { 80, 1, 32}, // 0.04% - { 88, 1, 32}, // 0.05% - { 96, 1, 32}, // 0.04% - { 104, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% - { 360, 1, 32}, // 0.04% - { 408, 1, 32}, // 0.10% - { 456, 1, 32}, // 0.17% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% - { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% - { 5504, 1, 11}, // 1.35% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 10880, 1, 6}, // 0.41% - { 11904, 1, 5}, // 0.12% - { 13056, 1, 5}, // 0.41% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 18688, 1, 3}, // 0.21% - { 21760, 1, 3}, // 0.41% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 42; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 72, 1, 32}, // 2.78% - { 80, 1, 32}, // 1.57% - { 88, 1, 32}, // 2.37% - { 96, 1, 32}, // 2.78% - { 104, 1, 32}, // 2.17% - { 120, 1, 32}, // 1.57% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 184, 1, 32}, // 2.37% - { 208, 1, 32}, // 4.86% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 312, 1, 32}, // 2.17% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 408, 1, 32}, // 1.57% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% - { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#else -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 82; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 80, 1, 32}, // 0.98% - { 96, 1, 32}, // 0.98% - { 112, 1, 32}, // 0.78% - { 128, 1, 32}, // 0.59% - { 144, 1, 32}, // 2.18% - { 160, 1, 32}, // 0.98% - { 176, 1, 32}, // 1.78% - { 192, 1, 32}, // 2.18% - { 208, 1, 32}, // 1.58% - { 224, 1, 32}, // 2.18% - { 240, 1, 32}, // 0.98% - { 256, 1, 32}, // 0.59% - { 272, 1, 32}, // 0.98% - { 288, 1, 32}, // 2.18% - { 304, 1, 32}, // 4.25% - { 320, 1, 32}, // 3.00% - { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% - { 368, 1, 32}, // 1.78% - { 384, 1, 32}, // 2.18% - { 400, 1, 32}, // 3.00% - { 416, 1, 32}, // 4.25% - { 448, 1, 32}, // 2.18% - { 480, 1, 32}, // 0.98% - { 512, 1, 32}, // 0.59% - { 576, 1, 32}, // 2.18% - { 640, 1, 32}, // 7.29% - { 704, 1, 32}, // 6.40% - { 768, 1, 32}, // 7.29% - { 896, 1, 32}, // 2.18% - { 1024, 1, 32}, // 0.59% - { 1152, 2, 32}, // 1.88% - { 1280, 2, 32}, // 6.98% - { 1408, 2, 32}, // 6.10% - { 1536, 2, 32}, // 6.98% - { 1792, 2, 32}, // 1.88% - { 2048, 2, 32}, // 0.29% - { 2304, 2, 28}, // 1.88% - { 2688, 2, 24}, // 1.88% - { 2816, 3, 23}, // 9.30% - { 3200, 2, 20}, // 2.70% - { 3456, 3, 18}, // 1.79% - { 3584, 4, 18}, // 1.74% - { 4096, 1, 16}, // 0.29% - { 4736, 3, 13}, // 3.99% - { 5376, 2, 12}, // 1.88% - { 6144, 3, 10}, // 0.20% - { 6528, 4, 10}, // 0.54% - { 7168, 7, 9}, // 0.08% - { 8192, 1, 8}, // 0.29% - { 9472, 5, 6}, // 8.23% - { 10240, 4, 6}, // 6.82% - { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% - { 14336, 7, 4}, // 0.08% - { 16384, 2, 4}, // 0.29% - { 20480, 5, 3}, // 0.12% - { 24576, 3, 2}, // 0.20% - { 28672, 7, 2}, // 0.08% - { 32768, 4, 2}, // 0.15% - { 40960, 5, 2}, // 0.12% - { 49152, 6, 2}, // 0.10% - { 57344, 7, 2}, // 0.08% - { 65536, 8, 2}, // 0.07% - { 73728, 9, 2}, // 0.07% - { 81920, 10, 2}, // 0.06% - { 90112, 11, 2}, // 0.05% - { 98304, 12, 2}, // 0.05% - { 106496, 13, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% - { 131072, 16, 2}, // 0.04% - { 147456, 18, 2}, // 0.03% - { 163840, 20, 2}, // 0.03% - { 180224, 22, 2}, // 0.03% - { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 74; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 80, 1, 32}, // 0.29% - { 96, 1, 32}, // 0.24% - { 112, 1, 32}, // 0.34% - { 128, 1, 32}, // 0.15% - { 144, 1, 32}, // 0.39% - { 160, 1, 32}, // 0.54% - { 176, 1, 32}, // 0.24% - { 192, 1, 32}, // 0.54% - { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% - { 256, 1, 32}, // 0.15% - { 272, 1, 32}, // 0.54% - { 288, 1, 32}, // 0.84% - { 304, 1, 32}, // 0.89% - { 320, 1, 32}, // 0.54% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% - { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% - { 480, 1, 32}, // 0.54% - { 512, 1, 32}, // 0.15% - { 576, 1, 32}, // 1.74% - { 640, 1, 32}, // 0.54% - { 704, 1, 32}, // 1.33% - { 768, 1, 32}, // 1.74% - { 832, 1, 32}, // 1.13% - { 896, 1, 32}, // 1.74% - { 1024, 1, 32}, // 0.15% - { 1152, 1, 32}, // 1.74% - { 1280, 1, 32}, // 2.55% - { 1408, 1, 32}, // 1.33% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% - { 2048, 1, 32}, // 0.15% - { 2176, 1, 30}, // 0.54% - { 2304, 1, 28}, // 1.74% - { 2688, 1, 24}, // 1.74% - { 2944, 1, 22}, // 1.33% - { 3200, 1, 20}, // 2.55% - { 3584, 1, 18}, // 1.74% - { 4096, 1, 16}, // 0.15% - { 4608, 1, 14}, // 1.74% - { 5376, 1, 12}, // 1.74% - { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% - { 8192, 1, 8}, // 0.15% - { 9344, 2, 7}, // 0.27% - { 10880, 1, 6}, // 0.54% - { 13952, 3, 4}, // 0.70% - { 16384, 1, 4}, // 0.15% - { 19072, 3, 3}, // 3.14% - { 21760, 2, 3}, // 0.47% - { 24576, 3, 2}, // 0.05% - { 28032, 6, 2}, // 0.22% - { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% - { 40960, 4, 2}, // 6.71% - { 49152, 3, 2}, // 0.05% - { 57344, 7, 2}, // 0.02% - { 65536, 2, 2}, // 0.07% - { 81920, 5, 2}, // 0.03% - { 98304, 3, 2}, // 0.05% - { 114688, 7, 2}, // 0.02% - { 131072, 4, 2}, // 0.04% - { 163840, 5, 2}, // 0.03% - { 196608, 6, 2}, // 0.02% - { 229376, 7, 2}, // 0.02% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 85; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 80, 1, 32}, // 0.04% - { 96, 1, 32}, // 0.04% - { 112, 1, 32}, // 0.04% - { 128, 1, 32}, // 0.02% - { 144, 1, 32}, // 0.04% - { 160, 1, 32}, // 0.04% - { 176, 1, 32}, // 0.05% - { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% - { 240, 1, 32}, // 0.04% - { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% - { 368, 1, 32}, // 0.07% - { 416, 1, 32}, // 0.04% - { 464, 1, 32}, // 0.19% - { 512, 1, 32}, // 0.02% - { 576, 1, 32}, // 0.04% - { 640, 1, 32}, // 0.17% - { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% - { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% - { 1024, 1, 32}, // 0.02% - { 1152, 1, 32}, // 0.26% - { 1280, 1, 32}, // 0.41% - { 1408, 1, 32}, // 0.12% - { 1536, 1, 32}, // 0.41% - { 1664, 1, 32}, // 0.36% - { 1792, 1, 32}, // 0.21% - { 1920, 1, 32}, // 0.41% - { 2048, 1, 32}, // 0.02% - { 2176, 1, 30}, // 0.41% - { 2304, 1, 28}, // 0.71% - { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% - { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% - { 2944, 1, 22}, // 0.07% - { 3072, 1, 21}, // 0.41% - { 3200, 1, 20}, // 1.15% - { 3328, 1, 19}, // 1.00% - { 3584, 1, 18}, // 0.21% - { 3840, 1, 17}, // 0.41% - { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% - { 5504, 1, 11}, // 1.35% - { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% - { 7168, 1, 9}, // 1.61% - { 7680, 1, 8}, // 0.41% - { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% - { 9344, 1, 7}, // 0.21% - { 10368, 1, 6}, // 1.15% - { 11392, 1, 5}, // 0.07% - { 12416, 1, 5}, // 0.56% - { 13696, 1, 4}, // 0.76% - { 14464, 1, 4}, // 0.71% - { 16384, 1, 4}, // 0.02% - { 18688, 1, 3}, // 0.21% - { 21760, 1, 3}, // 0.41% - { 26112, 1, 2}, // 0.41% - { 29056, 1, 2}, // 0.26% - { 32768, 1, 2}, // 0.02% - { 37376, 1, 2}, // 0.21% - { 43648, 1, 2}, // 0.12% - { 52352, 1, 2}, // 0.17% - { 56064, 2, 2}, // 3.92% - { 65536, 1, 2}, // 0.02% - { 74880, 2, 2}, // 0.03% - { 87296, 1, 2}, // 0.12% - { 104832, 2, 2}, // 0.03% - { 112256, 3, 2}, // 0.09% - { 131072, 1, 2}, // 0.02% - { 149760, 3, 2}, // 5.03% - { 174720, 2, 2}, // 0.03% - { 209664, 4, 2}, // 0.03% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 42; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 80, 1, 32}, // 1.57% - { 96, 1, 32}, // 2.78% - { 112, 1, 32}, // 2.78% - { 128, 1, 32}, // 1.17% - { 144, 1, 32}, // 2.78% - { 160, 1, 32}, // 3.60% - { 176, 1, 32}, // 2.37% - { 192, 1, 32}, // 2.78% - { 208, 1, 32}, // 4.86% - { 240, 1, 32}, // 1.57% - { 256, 1, 32}, // 1.17% - { 272, 1, 32}, // 1.57% - { 304, 1, 32}, // 4.86% - { 336, 1, 32}, // 2.78% - { 368, 1, 32}, // 2.37% - { 400, 1, 32}, // 3.60% - { 448, 1, 32}, // 2.78% - { 512, 1, 32}, // 1.17% - { 576, 2, 32}, // 2.18% - { 640, 2, 32}, // 7.29% - { 704, 2, 32}, // 6.40% - { 768, 2, 32}, // 7.29% - { 896, 2, 32}, // 2.18% - { 1024, 2, 32}, // 0.59% - { 1152, 3, 32}, // 7.08% - { 1280, 3, 32}, // 7.08% - { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% - { 2048, 4, 32}, // 0.29% - { 2304, 4, 28}, // 1.88% - { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% - { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% - { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#endif -// clang-format on - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/common.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { + +namespace tcmalloc_internal { + +// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation +// and other factors. For instance, if we have a 96 byte size class, and use a +// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes +// left over. There is also a fixed component of 48 bytes of TCMalloc metadata +// per span. Together, the fixed overhead would be wasted/allocated = +// (32 + 48) / (8192 - 32) ~= 0.98%. +// There is also a dynamic component to overhead based on mismatches between the +// number of bytes requested and the number of bytes provided by the size class. +// Together they sum to the total overhead; for instance if you asked for a +// 50-byte allocation that rounds up to a 64-byte size class, the dynamic +// overhead would be 28%, and if <fixed> were 22% it would mean (on average) +// 25 bytes of overhead for allocations of that size. + +// clang-format off +#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 82; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 32, 1, 32}, // 0.59% + { 64, 1, 32}, // 0.59% + { 72, 1, 32}, // 1.28% + { 80, 1, 32}, // 0.98% + { 88, 1, 32}, // 0.68% + { 96, 1, 32}, // 0.98% + { 104, 1, 32}, // 1.58% + { 112, 1, 32}, // 0.78% + { 120, 1, 32}, // 0.98% + { 128, 1, 32}, // 0.59% + { 136, 1, 32}, // 0.98% + { 144, 1, 32}, // 2.18% + { 160, 1, 32}, // 0.98% + { 176, 1, 32}, // 1.78% + { 192, 1, 32}, // 2.18% + { 208, 1, 32}, // 1.58% + { 224, 1, 32}, // 2.18% + { 240, 1, 32}, // 0.98% + { 256, 1, 32}, // 0.59% + { 272, 1, 32}, // 0.98% + { 296, 1, 32}, // 3.10% + { 312, 1, 32}, // 1.58% + { 336, 1, 32}, // 2.18% + { 352, 1, 32}, // 1.78% + { 368, 1, 32}, // 1.78% + { 408, 1, 32}, // 0.98% + { 448, 1, 32}, // 2.18% + { 480, 1, 32}, // 0.98% + { 512, 1, 32}, // 0.59% + { 576, 1, 32}, // 2.18% + { 640, 1, 32}, // 7.29% + { 704, 1, 32}, // 6.40% + { 768, 1, 32}, // 7.29% + { 896, 1, 32}, // 2.18% + { 1024, 1, 32}, // 0.59% + { 1152, 2, 32}, // 1.88% + { 1280, 2, 32}, // 6.98% + { 1408, 2, 32}, // 6.10% + { 1536, 2, 32}, // 6.98% + { 1792, 2, 32}, // 1.88% + { 2048, 2, 32}, // 0.29% + { 2304, 2, 28}, // 1.88% + { 2688, 2, 24}, // 1.88% + { 2816, 3, 23}, // 9.30% + { 3200, 2, 20}, // 2.70% + { 3456, 3, 18}, // 1.79% + { 3584, 4, 18}, // 1.74% + { 4096, 1, 16}, // 0.29% + { 4736, 3, 13}, // 3.99% + { 5376, 2, 12}, // 1.88% + { 6144, 3, 10}, // 0.20% + { 6528, 4, 10}, // 0.54% + { 7168, 7, 9}, // 0.08% + { 8192, 1, 8}, // 0.29% + { 9472, 5, 6}, // 8.23% + { 10240, 4, 6}, // 6.82% + { 12288, 3, 5}, // 0.20% + { 13568, 5, 4}, // 0.75% + { 14336, 7, 4}, // 0.08% + { 16384, 2, 4}, // 0.29% + { 20480, 5, 3}, // 0.12% + { 24576, 3, 2}, // 0.20% + { 28672, 7, 2}, // 0.08% + { 32768, 4, 2}, // 0.15% + { 40960, 5, 2}, // 0.12% + { 49152, 6, 2}, // 0.10% + { 57344, 7, 2}, // 0.08% + { 65536, 8, 2}, // 0.07% + { 73728, 9, 2}, // 0.07% + { 81920, 10, 2}, // 0.06% + { 98304, 12, 2}, // 0.05% + { 114688, 14, 2}, // 0.04% + { 131072, 16, 2}, // 0.04% + { 147456, 18, 2}, // 0.03% + { 163840, 20, 2}, // 0.03% + { 180224, 22, 2}, // 0.03% + { 204800, 25, 2}, // 0.02% + { 237568, 29, 2}, // 0.02% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 74; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 32, 1, 32}, // 0.15% + { 64, 1, 32}, // 0.15% + { 72, 1, 32}, // 0.17% + { 80, 1, 32}, // 0.29% + { 88, 1, 32}, // 0.24% + { 96, 1, 32}, // 0.24% + { 104, 1, 32}, // 0.17% + { 112, 1, 32}, // 0.34% + { 128, 1, 32}, // 0.15% + { 144, 1, 32}, // 0.39% + { 160, 1, 32}, // 0.54% + { 176, 1, 32}, // 0.24% + { 192, 1, 32}, // 0.54% + { 208, 1, 32}, // 0.49% + { 224, 1, 32}, // 0.34% + { 240, 1, 32}, // 0.54% + { 256, 1, 32}, // 0.15% + { 280, 1, 32}, // 0.17% + { 304, 1, 32}, // 0.89% + { 328, 1, 32}, // 1.06% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% + { 416, 1, 32}, // 1.13% + { 448, 1, 32}, // 0.34% + { 488, 1, 32}, // 0.37% + { 512, 1, 32}, // 0.15% + { 576, 1, 32}, // 1.74% + { 640, 1, 32}, // 0.54% + { 704, 1, 32}, // 1.33% + { 832, 1, 32}, // 1.13% + { 896, 1, 32}, // 1.74% + { 1024, 1, 32}, // 0.15% + { 1152, 1, 32}, // 1.74% + { 1280, 1, 32}, // 2.55% + { 1536, 1, 32}, // 1.74% + { 1792, 1, 32}, // 1.74% + { 2048, 1, 32}, // 0.15% + { 2176, 1, 30}, // 0.54% + { 2304, 1, 28}, // 1.74% + { 2688, 1, 24}, // 1.74% + { 2944, 1, 22}, // 1.33% + { 3200, 1, 20}, // 2.55% + { 3584, 1, 18}, // 1.74% + { 4096, 1, 16}, // 0.15% + { 4608, 1, 14}, // 1.74% + { 5376, 1, 12}, // 1.74% + { 6528, 1, 10}, // 0.54% + { 7168, 2, 9}, // 1.66% + { 8192, 1, 8}, // 0.15% + { 9344, 2, 7}, // 0.27% + { 10880, 1, 6}, // 0.54% + { 13952, 3, 4}, // 0.70% + { 16384, 1, 4}, // 0.15% + { 19072, 3, 3}, // 3.14% + { 21760, 2, 3}, // 0.47% + { 24576, 3, 2}, // 0.05% + { 28032, 6, 2}, // 0.22% + { 32768, 1, 2}, // 0.15% + { 38144, 5, 2}, // 7.41% + { 40960, 4, 2}, // 6.71% + { 49152, 3, 2}, // 0.05% + { 57344, 7, 2}, // 0.02% + { 65536, 2, 2}, // 0.07% + { 81920, 5, 2}, // 0.03% + { 98304, 3, 2}, // 0.05% + { 114688, 7, 2}, // 0.02% + { 131072, 4, 2}, // 0.04% + { 163840, 5, 2}, // 0.03% + { 196608, 6, 2}, // 0.02% + { 229376, 7, 2}, // 0.02% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 85; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 72, 1, 32}, // 0.04% + { 80, 1, 32}, // 0.04% + { 88, 1, 32}, // 0.05% + { 96, 1, 32}, // 0.04% + { 104, 1, 32}, // 0.04% + { 112, 1, 32}, // 0.04% + { 128, 1, 32}, // 0.02% + { 144, 1, 32}, // 0.04% + { 160, 1, 32}, // 0.04% + { 176, 1, 32}, // 0.05% + { 192, 1, 32}, // 0.04% + { 208, 1, 32}, // 0.04% + { 240, 1, 32}, // 0.04% + { 256, 1, 32}, // 0.02% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% + { 360, 1, 32}, // 0.04% + { 408, 1, 32}, // 0.10% + { 456, 1, 32}, // 0.17% + { 512, 1, 32}, // 0.02% + { 576, 1, 32}, // 0.04% + { 640, 1, 32}, // 0.17% + { 704, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% + { 832, 1, 32}, // 0.04% + { 896, 1, 32}, // 0.21% + { 1024, 1, 32}, // 0.02% + { 1152, 1, 32}, // 0.26% + { 1280, 1, 32}, // 0.41% + { 1536, 1, 32}, // 0.41% + { 1664, 1, 32}, // 0.36% + { 1792, 1, 32}, // 0.21% + { 1920, 1, 32}, // 0.41% + { 2048, 1, 32}, // 0.02% + { 2176, 1, 30}, // 0.41% + { 2304, 1, 28}, // 0.71% + { 2432, 1, 26}, // 0.76% + { 2560, 1, 25}, // 0.41% + { 2688, 1, 24}, // 0.56% + { 2816, 1, 23}, // 0.12% + { 2944, 1, 22}, // 0.07% + { 3072, 1, 21}, // 0.41% + { 3328, 1, 19}, // 1.00% + { 3584, 1, 18}, // 0.21% + { 3840, 1, 17}, // 0.41% + { 4096, 1, 16}, // 0.02% + { 4736, 1, 13}, // 0.66% + { 5504, 1, 11}, // 1.35% + { 6144, 1, 10}, // 1.61% + { 6528, 1, 10}, // 0.41% + { 6784, 1, 9}, // 1.71% + { 7168, 1, 9}, // 1.61% + { 7680, 1, 8}, // 0.41% + { 8192, 1, 8}, // 0.02% + { 8704, 1, 7}, // 0.41% + { 9344, 1, 7}, // 0.21% + { 10880, 1, 6}, // 0.41% + { 11904, 1, 5}, // 0.12% + { 13056, 1, 5}, // 0.41% + { 14464, 1, 4}, // 0.71% + { 16384, 1, 4}, // 0.02% + { 18688, 1, 3}, // 0.21% + { 21760, 1, 3}, // 0.41% + { 26112, 1, 2}, // 0.41% + { 29056, 1, 2}, // 0.26% + { 32768, 1, 2}, // 0.02% + { 37376, 1, 2}, // 0.21% + { 43648, 1, 2}, // 0.12% + { 52352, 1, 2}, // 0.17% + { 56064, 2, 2}, // 3.92% + { 65536, 1, 2}, // 0.02% + { 74880, 2, 2}, // 0.03% + { 87296, 1, 2}, // 0.12% + { 104832, 2, 2}, // 0.03% + { 112256, 3, 2}, // 0.09% + { 131072, 1, 2}, // 0.02% + { 149760, 3, 2}, // 5.03% + { 174720, 2, 2}, // 0.03% + { 209664, 4, 2}, // 0.03% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +static const int kCount = 42; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 32, 1, 32}, // 1.17% + { 64, 1, 32}, // 1.17% + { 72, 1, 32}, // 2.78% + { 80, 1, 32}, // 1.57% + { 88, 1, 32}, // 2.37% + { 96, 1, 32}, // 2.78% + { 104, 1, 32}, // 2.17% + { 120, 1, 32}, // 1.57% + { 128, 1, 32}, // 1.17% + { 144, 1, 32}, // 2.78% + { 160, 1, 32}, // 3.60% + { 184, 1, 32}, // 2.37% + { 208, 1, 32}, // 4.86% + { 240, 1, 32}, // 1.57% + { 256, 1, 32}, // 1.17% + { 272, 1, 32}, // 1.57% + { 312, 1, 32}, // 2.17% + { 336, 1, 32}, // 2.78% + { 368, 1, 32}, // 2.37% + { 408, 1, 32}, // 1.57% + { 512, 1, 32}, // 1.17% + { 576, 2, 32}, // 2.18% + { 704, 2, 32}, // 6.40% + { 768, 2, 32}, // 7.29% + { 896, 2, 32}, // 2.18% + { 1024, 2, 32}, // 0.59% + { 1152, 3, 32}, // 7.08% + { 1280, 3, 32}, // 7.08% + { 1536, 3, 32}, // 0.39% + { 1792, 4, 32}, // 1.88% + { 2048, 4, 32}, // 0.29% + { 2304, 4, 28}, // 1.88% + { 2688, 4, 24}, // 1.88% + { 3456, 6, 18}, // 1.79% + { 4096, 4, 16}, // 0.29% + { 5376, 4, 12}, // 1.88% + { 6144, 3, 10}, // 0.39% + { 7168, 7, 9}, // 0.17% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#else +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 82; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 32, 1, 32}, // 0.59% + { 64, 1, 32}, // 0.59% + { 80, 1, 32}, // 0.98% + { 96, 1, 32}, // 0.98% + { 112, 1, 32}, // 0.78% + { 128, 1, 32}, // 0.59% + { 144, 1, 32}, // 2.18% + { 160, 1, 32}, // 0.98% + { 176, 1, 32}, // 1.78% + { 192, 1, 32}, // 2.18% + { 208, 1, 32}, // 1.58% + { 224, 1, 32}, // 2.18% + { 240, 1, 32}, // 0.98% + { 256, 1, 32}, // 0.59% + { 272, 1, 32}, // 0.98% + { 288, 1, 32}, // 2.18% + { 304, 1, 32}, // 4.25% + { 320, 1, 32}, // 3.00% + { 336, 1, 32}, // 2.18% + { 352, 1, 32}, // 1.78% + { 368, 1, 32}, // 1.78% + { 384, 1, 32}, // 2.18% + { 400, 1, 32}, // 3.00% + { 416, 1, 32}, // 4.25% + { 448, 1, 32}, // 2.18% + { 480, 1, 32}, // 0.98% + { 512, 1, 32}, // 0.59% + { 576, 1, 32}, // 2.18% + { 640, 1, 32}, // 7.29% + { 704, 1, 32}, // 6.40% + { 768, 1, 32}, // 7.29% + { 896, 1, 32}, // 2.18% + { 1024, 1, 32}, // 0.59% + { 1152, 2, 32}, // 1.88% + { 1280, 2, 32}, // 6.98% + { 1408, 2, 32}, // 6.10% + { 1536, 2, 32}, // 6.98% + { 1792, 2, 32}, // 1.88% + { 2048, 2, 32}, // 0.29% + { 2304, 2, 28}, // 1.88% + { 2688, 2, 24}, // 1.88% + { 2816, 3, 23}, // 9.30% + { 3200, 2, 20}, // 2.70% + { 3456, 3, 18}, // 1.79% + { 3584, 4, 18}, // 1.74% + { 4096, 1, 16}, // 0.29% + { 4736, 3, 13}, // 3.99% + { 5376, 2, 12}, // 1.88% + { 6144, 3, 10}, // 0.20% + { 6528, 4, 10}, // 0.54% + { 7168, 7, 9}, // 0.08% + { 8192, 1, 8}, // 0.29% + { 9472, 5, 6}, // 8.23% + { 10240, 4, 6}, // 6.82% + { 12288, 3, 5}, // 0.20% + { 13568, 5, 4}, // 0.75% + { 14336, 7, 4}, // 0.08% + { 16384, 2, 4}, // 0.29% + { 20480, 5, 3}, // 0.12% + { 24576, 3, 2}, // 0.20% + { 28672, 7, 2}, // 0.08% + { 32768, 4, 2}, // 0.15% + { 40960, 5, 2}, // 0.12% + { 49152, 6, 2}, // 0.10% + { 57344, 7, 2}, // 0.08% + { 65536, 8, 2}, // 0.07% + { 73728, 9, 2}, // 0.07% + { 81920, 10, 2}, // 0.06% + { 90112, 11, 2}, // 0.05% + { 98304, 12, 2}, // 0.05% + { 106496, 13, 2}, // 0.05% + { 114688, 14, 2}, // 0.04% + { 131072, 16, 2}, // 0.04% + { 147456, 18, 2}, // 0.03% + { 163840, 20, 2}, // 0.03% + { 180224, 22, 2}, // 0.03% + { 204800, 25, 2}, // 0.02% + { 237568, 29, 2}, // 0.02% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 74; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 32, 1, 32}, // 0.15% + { 64, 1, 32}, // 0.15% + { 80, 1, 32}, // 0.29% + { 96, 1, 32}, // 0.24% + { 112, 1, 32}, // 0.34% + { 128, 1, 32}, // 0.15% + { 144, 1, 32}, // 0.39% + { 160, 1, 32}, // 0.54% + { 176, 1, 32}, // 0.24% + { 192, 1, 32}, // 0.54% + { 208, 1, 32}, // 0.49% + { 224, 1, 32}, // 0.34% + { 240, 1, 32}, // 0.54% + { 256, 1, 32}, // 0.15% + { 272, 1, 32}, // 0.54% + { 288, 1, 32}, // 0.84% + { 304, 1, 32}, // 0.89% + { 320, 1, 32}, // 0.54% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% + { 416, 1, 32}, // 1.13% + { 448, 1, 32}, // 0.34% + { 480, 1, 32}, // 0.54% + { 512, 1, 32}, // 0.15% + { 576, 1, 32}, // 1.74% + { 640, 1, 32}, // 0.54% + { 704, 1, 32}, // 1.33% + { 768, 1, 32}, // 1.74% + { 832, 1, 32}, // 1.13% + { 896, 1, 32}, // 1.74% + { 1024, 1, 32}, // 0.15% + { 1152, 1, 32}, // 1.74% + { 1280, 1, 32}, // 2.55% + { 1408, 1, 32}, // 1.33% + { 1536, 1, 32}, // 1.74% + { 1792, 1, 32}, // 1.74% + { 2048, 1, 32}, // 0.15% + { 2176, 1, 30}, // 0.54% + { 2304, 1, 28}, // 1.74% + { 2688, 1, 24}, // 1.74% + { 2944, 1, 22}, // 1.33% + { 3200, 1, 20}, // 2.55% + { 3584, 1, 18}, // 1.74% + { 4096, 1, 16}, // 0.15% + { 4608, 1, 14}, // 1.74% + { 5376, 1, 12}, // 1.74% + { 6528, 1, 10}, // 0.54% + { 7168, 2, 9}, // 1.66% + { 8192, 1, 8}, // 0.15% + { 9344, 2, 7}, // 0.27% + { 10880, 1, 6}, // 0.54% + { 13952, 3, 4}, // 0.70% + { 16384, 1, 4}, // 0.15% + { 19072, 3, 3}, // 3.14% + { 21760, 2, 3}, // 0.47% + { 24576, 3, 2}, // 0.05% + { 28032, 6, 2}, // 0.22% + { 32768, 1, 2}, // 0.15% + { 38144, 5, 2}, // 7.41% + { 40960, 4, 2}, // 6.71% + { 49152, 3, 2}, // 0.05% + { 57344, 7, 2}, // 0.02% + { 65536, 2, 2}, // 0.07% + { 81920, 5, 2}, // 0.03% + { 98304, 3, 2}, // 0.05% + { 114688, 7, 2}, // 0.02% + { 131072, 4, 2}, // 0.04% + { 163840, 5, 2}, // 0.03% + { 196608, 6, 2}, // 0.02% + { 229376, 7, 2}, // 0.02% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 85; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 80, 1, 32}, // 0.04% + { 96, 1, 32}, // 0.04% + { 112, 1, 32}, // 0.04% + { 128, 1, 32}, // 0.02% + { 144, 1, 32}, // 0.04% + { 160, 1, 32}, // 0.04% + { 176, 1, 32}, // 0.05% + { 192, 1, 32}, // 0.04% + { 208, 1, 32}, // 0.04% + { 240, 1, 32}, // 0.04% + { 256, 1, 32}, // 0.02% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% + { 368, 1, 32}, // 0.07% + { 416, 1, 32}, // 0.04% + { 464, 1, 32}, // 0.19% + { 512, 1, 32}, // 0.02% + { 576, 1, 32}, // 0.04% + { 640, 1, 32}, // 0.17% + { 704, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% + { 832, 1, 32}, // 0.04% + { 896, 1, 32}, // 0.21% + { 1024, 1, 32}, // 0.02% + { 1152, 1, 32}, // 0.26% + { 1280, 1, 32}, // 0.41% + { 1408, 1, 32}, // 0.12% + { 1536, 1, 32}, // 0.41% + { 1664, 1, 32}, // 0.36% + { 1792, 1, 32}, // 0.21% + { 1920, 1, 32}, // 0.41% + { 2048, 1, 32}, // 0.02% + { 2176, 1, 30}, // 0.41% + { 2304, 1, 28}, // 0.71% + { 2432, 1, 26}, // 0.76% + { 2560, 1, 25}, // 0.41% + { 2688, 1, 24}, // 0.56% + { 2816, 1, 23}, // 0.12% + { 2944, 1, 22}, // 0.07% + { 3072, 1, 21}, // 0.41% + { 3200, 1, 20}, // 1.15% + { 3328, 1, 19}, // 1.00% + { 3584, 1, 18}, // 0.21% + { 3840, 1, 17}, // 0.41% + { 4096, 1, 16}, // 0.02% + { 4736, 1, 13}, // 0.66% + { 5504, 1, 11}, // 1.35% + { 6144, 1, 10}, // 1.61% + { 6528, 1, 10}, // 0.41% + { 6784, 1, 9}, // 1.71% + { 7168, 1, 9}, // 1.61% + { 7680, 1, 8}, // 0.41% + { 8192, 1, 8}, // 0.02% + { 8704, 1, 7}, // 0.41% + { 9344, 1, 7}, // 0.21% + { 10368, 1, 6}, // 1.15% + { 11392, 1, 5}, // 0.07% + { 12416, 1, 5}, // 0.56% + { 13696, 1, 4}, // 0.76% + { 14464, 1, 4}, // 0.71% + { 16384, 1, 4}, // 0.02% + { 18688, 1, 3}, // 0.21% + { 21760, 1, 3}, // 0.41% + { 26112, 1, 2}, // 0.41% + { 29056, 1, 2}, // 0.26% + { 32768, 1, 2}, // 0.02% + { 37376, 1, 2}, // 0.21% + { 43648, 1, 2}, // 0.12% + { 52352, 1, 2}, // 0.17% + { 56064, 2, 2}, // 3.92% + { 65536, 1, 2}, // 0.02% + { 74880, 2, 2}, // 0.03% + { 87296, 1, 2}, // 0.12% + { 104832, 2, 2}, // 0.03% + { 112256, 3, 2}, // 0.09% + { 131072, 1, 2}, // 0.02% + { 149760, 3, 2}, // 5.03% + { 174720, 2, 2}, // 0.03% + { 209664, 4, 2}, // 0.03% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +static const int kCount = 42; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 32, 1, 32}, // 1.17% + { 64, 1, 32}, // 1.17% + { 80, 1, 32}, // 1.57% + { 96, 1, 32}, // 2.78% + { 112, 1, 32}, // 2.78% + { 128, 1, 32}, // 1.17% + { 144, 1, 32}, // 2.78% + { 160, 1, 32}, // 3.60% + { 176, 1, 32}, // 2.37% + { 192, 1, 32}, // 2.78% + { 208, 1, 32}, // 4.86% + { 240, 1, 32}, // 1.57% + { 256, 1, 32}, // 1.17% + { 272, 1, 32}, // 1.57% + { 304, 1, 32}, // 4.86% + { 336, 1, 32}, // 2.78% + { 368, 1, 32}, // 2.37% + { 400, 1, 32}, // 3.60% + { 448, 1, 32}, // 2.78% + { 512, 1, 32}, // 1.17% + { 576, 2, 32}, // 2.18% + { 640, 2, 32}, // 7.29% + { 704, 2, 32}, // 6.40% + { 768, 2, 32}, // 7.29% + { 896, 2, 32}, // 2.18% + { 1024, 2, 32}, // 0.59% + { 1152, 3, 32}, // 7.08% + { 1280, 3, 32}, // 7.08% + { 1536, 3, 32}, // 0.39% + { 1792, 4, 32}, // 1.88% + { 2048, 4, 32}, // 0.29% + { 2304, 4, 28}, // 1.88% + { 2688, 4, 24}, // 1.88% + { 3456, 6, 18}, // 1.79% + { 4096, 4, 16}, // 0.29% + { 5376, 4, 12}, // 1.88% + { 6144, 3, 10}, // 0.39% + { 7168, 7, 9}, // 0.17% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#endif +// clang-format on + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc index 1e6da051ca..3bd5e54c3c 100755 --- a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc +++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc @@ -1,239 +1,239 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/common.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { - -namespace tcmalloc_internal { - -// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation -// and other factors. For instance, if we have a 96 byte size class, and use a -// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes -// left over. There is also a fixed component of 48 bytes of TCMalloc metadata -// per span. Together, the fixed overhead would be wasted/allocated = -// (32 + 48) / (8192 - 32) ~= 0.98%. -// There is also a dynamic component to overhead based on mismatches between the -// number of bytes requested and the number of bytes provided by the size class. -// Together they sum to the total overhead; for instance if you asked for a -// 50-byte allocation that rounds up to a 64-byte size class, the dynamic -// overhead would be 28%, and if <fixed> were 22% it would mean (on average) -// 25 bytes of overhead for allocations of that size. - -// clang-format off -#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 128, 1, 32}, // 0.59% - { 256, 1, 32}, // 0.59% - { 512, 1, 32}, // 0.59% - { 1024, 1, 32}, // 0.59% - { 2048, 2, 32}, // 0.29% - { 4096, 1, 16}, // 0.29% - { 8192, 1, 8}, // 0.29% - { 16384, 2, 4}, // 0.29% - { 32768, 4, 2}, // 0.15% - { 65536, 8, 2}, // 0.07% - { 131072, 16, 2}, // 0.04% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 128, 1, 32}, // 0.15% - { 256, 1, 32}, // 0.15% - { 512, 1, 32}, // 0.15% - { 1024, 1, 32}, // 0.15% - { 2048, 1, 32}, // 0.15% - { 4096, 1, 16}, // 0.15% - { 8192, 1, 8}, // 0.15% - { 16384, 1, 4}, // 0.15% - { 32768, 1, 2}, // 0.15% - { 65536, 2, 2}, // 0.07% - { 131072, 4, 2}, // 0.04% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 128, 1, 32}, // 0.02% - { 256, 1, 32}, // 0.02% - { 512, 1, 32}, // 0.02% - { 1024, 1, 32}, // 0.02% - { 2048, 1, 32}, // 0.02% - { 4096, 1, 16}, // 0.02% - { 8192, 1, 8}, // 0.02% - { 16384, 1, 4}, // 0.02% - { 32768, 1, 2}, // 0.02% - { 65536, 1, 2}, // 0.02% - { 131072, 1, 2}, // 0.02% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 12; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 128, 1, 32}, // 1.17% - { 256, 1, 32}, // 1.17% - { 512, 1, 32}, // 1.17% - { 1024, 2, 32}, // 0.59% - { 2048, 4, 32}, // 0.29% - { 4096, 4, 16}, // 0.29% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#else -#if TCMALLOC_PAGE_SHIFT == 13 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.59% - { 16, 1, 32}, // 0.59% - { 32, 1, 32}, // 0.59% - { 64, 1, 32}, // 0.59% - { 128, 1, 32}, // 0.59% - { 256, 1, 32}, // 0.59% - { 512, 1, 32}, // 0.59% - { 1024, 1, 32}, // 0.59% - { 2048, 2, 32}, // 0.29% - { 4096, 1, 16}, // 0.29% - { 8192, 1, 8}, // 0.29% - { 16384, 2, 4}, // 0.29% - { 32768, 4, 2}, // 0.15% - { 65536, 8, 2}, // 0.07% - { 131072, 16, 2}, // 0.04% - { 262144, 32, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 15 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.15% - { 16, 1, 32}, // 0.15% - { 32, 1, 32}, // 0.15% - { 64, 1, 32}, // 0.15% - { 128, 1, 32}, // 0.15% - { 256, 1, 32}, // 0.15% - { 512, 1, 32}, // 0.15% - { 1024, 1, 32}, // 0.15% - { 2048, 1, 32}, // 0.15% - { 4096, 1, 16}, // 0.15% - { 8192, 1, 8}, // 0.15% - { 16384, 1, 4}, // 0.15% - { 32768, 1, 2}, // 0.15% - { 65536, 2, 2}, // 0.07% - { 131072, 4, 2}, // 0.04% - { 262144, 8, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 18 -static_assert(kMaxSize == 262144, "kMaxSize mismatch"); -static const int kCount = 17; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 0.02% - { 16, 1, 32}, // 0.02% - { 32, 1, 32}, // 0.02% - { 64, 1, 32}, // 0.02% - { 128, 1, 32}, // 0.02% - { 256, 1, 32}, // 0.02% - { 512, 1, 32}, // 0.02% - { 1024, 1, 32}, // 0.02% - { 2048, 1, 32}, // 0.02% - { 4096, 1, 16}, // 0.02% - { 8192, 1, 8}, // 0.02% - { 16384, 1, 4}, // 0.02% - { 32768, 1, 2}, // 0.02% - { 65536, 1, 2}, // 0.02% - { 131072, 1, 2}, // 0.02% - { 262144, 1, 2}, // 0.02% -}; -#elif TCMALLOC_PAGE_SHIFT == 12 -static_assert(kMaxSize == 8192, "kMaxSize mismatch"); -static const int kCount = 12; -static_assert(kCount <= kNumClasses); -const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; -const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { - // <bytes>, <pages>, <batch size> <fixed> - { 0, 0, 0}, // +Inf% - { 8, 1, 32}, // 1.17% - { 16, 1, 32}, // 1.17% - { 32, 1, 32}, // 1.17% - { 64, 1, 32}, // 1.17% - { 128, 1, 32}, // 1.17% - { 256, 1, 32}, // 1.17% - { 512, 1, 32}, // 1.17% - { 1024, 2, 32}, // 0.59% - { 2048, 4, 32}, // 0.29% - { 4096, 4, 16}, // 0.29% - { 8192, 4, 8}, // 0.29% -}; -#else -#error "Unsupported TCMALLOC_PAGE_SHIFT value!" -#endif -#endif -// clang-format on - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/common.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { + +namespace tcmalloc_internal { + +// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation +// and other factors. For instance, if we have a 96 byte size class, and use a +// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes +// left over. There is also a fixed component of 48 bytes of TCMalloc metadata +// per span. Together, the fixed overhead would be wasted/allocated = +// (32 + 48) / (8192 - 32) ~= 0.98%. +// There is also a dynamic component to overhead based on mismatches between the +// number of bytes requested and the number of bytes provided by the size class. +// Together they sum to the total overhead; for instance if you asked for a +// 50-byte allocation that rounds up to a 64-byte size class, the dynamic +// overhead would be 28%, and if <fixed> were 22% it would mean (on average) +// 25 bytes of overhead for allocations of that size. + +// clang-format off +#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8 +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 17; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 32, 1, 32}, // 0.59% + { 64, 1, 32}, // 0.59% + { 128, 1, 32}, // 0.59% + { 256, 1, 32}, // 0.59% + { 512, 1, 32}, // 0.59% + { 1024, 1, 32}, // 0.59% + { 2048, 2, 32}, // 0.29% + { 4096, 1, 16}, // 0.29% + { 8192, 1, 8}, // 0.29% + { 16384, 2, 4}, // 0.29% + { 32768, 4, 2}, // 0.15% + { 65536, 8, 2}, // 0.07% + { 131072, 16, 2}, // 0.04% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 17; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 32, 1, 32}, // 0.15% + { 64, 1, 32}, // 0.15% + { 128, 1, 32}, // 0.15% + { 256, 1, 32}, // 0.15% + { 512, 1, 32}, // 0.15% + { 1024, 1, 32}, // 0.15% + { 2048, 1, 32}, // 0.15% + { 4096, 1, 16}, // 0.15% + { 8192, 1, 8}, // 0.15% + { 16384, 1, 4}, // 0.15% + { 32768, 1, 2}, // 0.15% + { 65536, 2, 2}, // 0.07% + { 131072, 4, 2}, // 0.04% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 17; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 128, 1, 32}, // 0.02% + { 256, 1, 32}, // 0.02% + { 512, 1, 32}, // 0.02% + { 1024, 1, 32}, // 0.02% + { 2048, 1, 32}, // 0.02% + { 4096, 1, 16}, // 0.02% + { 8192, 1, 8}, // 0.02% + { 16384, 1, 4}, // 0.02% + { 32768, 1, 2}, // 0.02% + { 65536, 1, 2}, // 0.02% + { 131072, 1, 2}, // 0.02% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +static const int kCount = 12; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 32, 1, 32}, // 1.17% + { 64, 1, 32}, // 1.17% + { 128, 1, 32}, // 1.17% + { 256, 1, 32}, // 1.17% + { 512, 1, 32}, // 1.17% + { 1024, 2, 32}, // 0.59% + { 2048, 4, 32}, // 0.29% + { 4096, 4, 16}, // 0.29% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#else +#if TCMALLOC_PAGE_SHIFT == 13 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 17; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.59% + { 16, 1, 32}, // 0.59% + { 32, 1, 32}, // 0.59% + { 64, 1, 32}, // 0.59% + { 128, 1, 32}, // 0.59% + { 256, 1, 32}, // 0.59% + { 512, 1, 32}, // 0.59% + { 1024, 1, 32}, // 0.59% + { 2048, 2, 32}, // 0.29% + { 4096, 1, 16}, // 0.29% + { 8192, 1, 8}, // 0.29% + { 16384, 2, 4}, // 0.29% + { 32768, 4, 2}, // 0.15% + { 65536, 8, 2}, // 0.07% + { 131072, 16, 2}, // 0.04% + { 262144, 32, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 15 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 17; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.15% + { 16, 1, 32}, // 0.15% + { 32, 1, 32}, // 0.15% + { 64, 1, 32}, // 0.15% + { 128, 1, 32}, // 0.15% + { 256, 1, 32}, // 0.15% + { 512, 1, 32}, // 0.15% + { 1024, 1, 32}, // 0.15% + { 2048, 1, 32}, // 0.15% + { 4096, 1, 16}, // 0.15% + { 8192, 1, 8}, // 0.15% + { 16384, 1, 4}, // 0.15% + { 32768, 1, 2}, // 0.15% + { 65536, 2, 2}, // 0.07% + { 131072, 4, 2}, // 0.04% + { 262144, 8, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 18 +static_assert(kMaxSize == 262144, "kMaxSize mismatch"); +static const int kCount = 17; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 0.02% + { 16, 1, 32}, // 0.02% + { 32, 1, 32}, // 0.02% + { 64, 1, 32}, // 0.02% + { 128, 1, 32}, // 0.02% + { 256, 1, 32}, // 0.02% + { 512, 1, 32}, // 0.02% + { 1024, 1, 32}, // 0.02% + { 2048, 1, 32}, // 0.02% + { 4096, 1, 16}, // 0.02% + { 8192, 1, 8}, // 0.02% + { 16384, 1, 4}, // 0.02% + { 32768, 1, 2}, // 0.02% + { 65536, 1, 2}, // 0.02% + { 131072, 1, 2}, // 0.02% + { 262144, 1, 2}, // 0.02% +}; +#elif TCMALLOC_PAGE_SHIFT == 12 +static_assert(kMaxSize == 8192, "kMaxSize mismatch"); +static const int kCount = 12; +static_assert(kCount <= kNumClasses); +const int SizeMap::kExperimentalPow2SizeClassesCount = kCount; +const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = { + // <bytes>, <pages>, <batch size> <fixed> + { 0, 0, 0}, // +Inf% + { 8, 1, 32}, // 1.17% + { 16, 1, 32}, // 1.17% + { 32, 1, 32}, // 1.17% + { 64, 1, 32}, // 1.17% + { 128, 1, 32}, // 1.17% + { 256, 1, 32}, // 1.17% + { 512, 1, 32}, // 1.17% + { 1024, 2, 32}, // 0.59% + { 2048, 4, 32}, // 0.29% + { 4096, 4, 16}, // 0.29% + { 8192, 4, 8}, // 0.29% +}; +#else +#error "Unsupported TCMALLOC_PAGE_SHIFT value!" +#endif +#endif +// clang-format on + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc index cc02ed7a05..817fc52324 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc @@ -20,7 +20,7 @@ #include <unistd.h> #include <algorithm> -#include <array> +#include <array> #include <cmath> #include <csignal> #include <tuple> @@ -30,7 +30,7 @@ #include "absl/base/internal/spinlock.h" #include "absl/base/internal/sysinfo.h" #include "absl/debugging/stacktrace.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "absl/strings/string_view.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/environment.h" @@ -41,9 +41,9 @@ #include "tcmalloc/static_vars.h" #include "tcmalloc/system-alloc.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { const size_t GuardedPageAllocator::kMagicSize; // NOLINT @@ -82,7 +82,7 @@ void *GuardedPageAllocator::Allocate(size_t size, size_t alignment) { ASSERT(size <= page_size_); ASSERT(alignment <= page_size_); - ASSERT(alignment == 0 || absl::has_single_bit(alignment)); + ASSERT(alignment == 0 || absl::has_single_bit(alignment)); void *result = reinterpret_cast<void *>(SlotToAddr(free_slot)); if (mprotect(result, page_size_, PROT_READ | PROT_WRITE) == -1) { ASSERT(false && "mprotect failed"); @@ -175,7 +175,7 @@ static int GetChainedRate() { } } -void GuardedPageAllocator::Print(Printer *out) { +void GuardedPageAllocator::Print(Printer *out) { absl::base_internal::SpinLockHolder h(&guarded_page_lock); out->printf( "\n" @@ -360,14 +360,14 @@ void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size, // If alignment == 0, the necessary alignment is never larger than the size // rounded up to the next power of 2. We use this fact to minimize alignment - // padding between the end of small allocations and their guard pages. - // - // For allocations larger than the greater of kAlignment and - // __STDCPP_DEFAULT_NEW_ALIGNMENT__, we're safe aligning to that value. - size_t default_alignment = - std::min(absl::bit_ceil(size), - std::max(kAlignment, - static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__))); + // padding between the end of small allocations and their guard pages. + // + // For allocations larger than the greater of kAlignment and + // __STDCPP_DEFAULT_NEW_ALIGNMENT__, we're safe aligning to that value. + size_t default_alignment = + std::min(absl::bit_ceil(size), + std::max(kAlignment, + static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__))); // Ensure valid alignment. alignment = std::max(alignment, default_alignment); @@ -384,7 +384,7 @@ void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size, // If this failure occurs during "bazel test", writes a warning for Bazel to // display. static void RecordBazelWarning(absl::string_view error) { - const char *warning_file = thread_safe_getenv("TEST_WARNINGS_OUTPUT_FILE"); + const char *warning_file = thread_safe_getenv("TEST_WARNINGS_OUTPUT_FILE"); if (!warning_file) return; // Not a bazel test. constexpr char warning[] = "GWP-ASan error detected: "; @@ -402,7 +402,7 @@ static void RecordBazelWarning(absl::string_view error) { // do here). So we write directly to the XML file instead. // static void RecordTestFailure(absl::string_view error) { - const char *xml_file = thread_safe_getenv("XML_OUTPUT_FILE"); + const char *xml_file = thread_safe_getenv("XML_OUTPUT_FILE"); if (!xml_file) return; // Not a gUnit test. // Record test failure for Sponge. @@ -467,9 +467,9 @@ static void SegvHandler(int signo, siginfo_t *info, void *context) { Static::guardedpage_allocator().GetAllocationOffsetAndSize(fault); Log(kLog, __FILE__, __LINE__, - "*** GWP-ASan " - "(https://google.github.io/tcmalloc/gwp-asan.html) " - "has detected a memory error ***"); + "*** GWP-ASan " + "(https://google.github.io/tcmalloc/gwp-asan.html) " + "has detected a memory error ***"); Log(kLog, __FILE__, __LINE__, ">>> Access at offset", offset, "into buffer of length", size); Log(kLog, __FILE__, __LINE__, @@ -557,6 +557,6 @@ extern "C" void MallocExtension_Internal_ActivateGuardedSampling() { }); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h index e5a6118c08..bd45c7da48 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h @@ -27,9 +27,9 @@ #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { ABSL_CONST_INIT extern absl::base_internal::SpinLock guarded_page_lock; @@ -172,7 +172,7 @@ class GuardedPageAllocator { // Writes a human-readable summary of GuardedPageAllocator's internal state to // *out. - void Print(Printer *out) ABSL_LOCKS_EXCLUDED(guarded_page_lock); + void Print(Printer *out) ABSL_LOCKS_EXCLUDED(guarded_page_lock); void PrintInPbtxt(PbtxtRegion *gwp_asan) const ABSL_LOCKS_EXCLUDED(guarded_page_lock); @@ -304,8 +304,8 @@ struct ConstexprCheck { "GuardedPageAllocator must have a constexpr constructor"); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc index fb6d0ea265..eace78815b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc @@ -19,12 +19,12 @@ #include "tcmalloc/guarded_page_allocator.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { -static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; +static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; // Size of pages used by GuardedPageAllocator. static size_t PageSize() { @@ -34,9 +34,9 @@ static size_t PageSize() { } void BM_AllocDealloc(benchmark::State& state) { - static GuardedPageAllocator* gpa = []() { - auto gpa = new GuardedPageAllocator; - absl::base_internal::SpinLockHolder h(&pageheap_lock); + static GuardedPageAllocator* gpa = []() { + auto gpa = new GuardedPageAllocator; + absl::base_internal::SpinLockHolder h(&pageheap_lock); gpa->Init(kMaxGpaPages, kMaxGpaPages); gpa->AllowAllocations(); return gpa; @@ -55,6 +55,6 @@ BENCHMARK(BM_AllocDealloc)->Range(1, PageSize()); BENCHMARK(BM_AllocDealloc)->Arg(1)->ThreadRange(1, kMaxGpaPages); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc index 0d603de690..463af9b8bc 100644 --- a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc @@ -27,12 +27,12 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/casts.h" +#include "absl/base/casts.h" #include "absl/base/internal/spinlock.h" #include "absl/base/internal/sysinfo.h" #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "absl/strings/str_cat.h" #include "absl/time/clock.h" #include "absl/time/time.h" @@ -41,10 +41,10 @@ #include "tcmalloc/static_vars.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { -static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; +static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages; // Size of pages used by GuardedPageAllocator. static size_t PageSize() { @@ -56,20 +56,20 @@ static size_t PageSize() { class GuardedPageAllocatorTest : public testing::Test { protected: GuardedPageAllocatorTest() { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + absl::base_internal::SpinLockHolder h(&pageheap_lock); gpa_.Init(kMaxGpaPages, kMaxGpaPages); gpa_.AllowAllocations(); } explicit GuardedPageAllocatorTest(size_t num_pages) { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + absl::base_internal::SpinLockHolder h(&pageheap_lock); gpa_.Init(num_pages, kMaxGpaPages); gpa_.AllowAllocations(); } ~GuardedPageAllocatorTest() override { gpa_.Destroy(); } - GuardedPageAllocator gpa_; + GuardedPageAllocator gpa_; }; class GuardedPageAllocatorParamTest @@ -92,36 +92,36 @@ TEST_F(GuardedPageAllocatorTest, SingleAllocDealloc) { EXPECT_DEATH(buf[PageSize() - 1] = 'B', ""); } -TEST_F(GuardedPageAllocatorTest, NoAlignmentProvided) { - constexpr size_t kLargeObjectAlignment = std::max( - kAlignment, static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__)); - - for (size_t base_size = 1; base_size <= 64; base_size <<= 1) { - for (size_t size : {base_size, base_size + 1}) { - SCOPED_TRACE(size); - - constexpr int kElements = 10; - std::array<void *, kElements> ptrs; - - // Make several allocation attempts to encounter left/right-alignment in - // the guarded region. - for (int i = 0; i < kElements; i++) { - ptrs[i] = gpa_.Allocate(size, 0); - EXPECT_NE(ptrs[i], nullptr); - EXPECT_TRUE(gpa_.PointerIsMine(ptrs[i])); - - size_t observed_alignment = - 1 << absl::countr_zero(absl::bit_cast<uintptr_t>(ptrs[i])); - EXPECT_GE(observed_alignment, std::min(size, kLargeObjectAlignment)); - } - - for (void *ptr : ptrs) { - gpa_.Deallocate(ptr); - } - } - } -} - +TEST_F(GuardedPageAllocatorTest, NoAlignmentProvided) { + constexpr size_t kLargeObjectAlignment = std::max( + kAlignment, static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__)); + + for (size_t base_size = 1; base_size <= 64; base_size <<= 1) { + for (size_t size : {base_size, base_size + 1}) { + SCOPED_TRACE(size); + + constexpr int kElements = 10; + std::array<void *, kElements> ptrs; + + // Make several allocation attempts to encounter left/right-alignment in + // the guarded region. + for (int i = 0; i < kElements; i++) { + ptrs[i] = gpa_.Allocate(size, 0); + EXPECT_NE(ptrs[i], nullptr); + EXPECT_TRUE(gpa_.PointerIsMine(ptrs[i])); + + size_t observed_alignment = + 1 << absl::countr_zero(absl::bit_cast<uintptr_t>(ptrs[i])); + EXPECT_GE(observed_alignment, std::min(size, kLargeObjectAlignment)); + } + + for (void *ptr : ptrs) { + gpa_.Deallocate(ptr); + } + } + } +} + TEST_F(GuardedPageAllocatorTest, AllocDeallocAligned) { for (size_t align = 1; align <= PageSize(); align <<= 1) { constexpr size_t alloc_size = 1; @@ -164,7 +164,7 @@ TEST_F(GuardedPageAllocatorTest, PointerIsMine) { TEST_F(GuardedPageAllocatorTest, Print) { char buf[1024] = {}; - Printer out(buf, sizeof(buf)); + Printer out(buf, sizeof(buf)); gpa_.Print(&out); EXPECT_THAT(buf, testing::ContainsRegex("GWP-ASan Status")); } @@ -239,5 +239,5 @@ TEST_F(GuardedPageAllocatorTest, ThreadedHighContention) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc index 5c2473ffed..88172e6657 100644 --- a/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc @@ -19,7 +19,7 @@ #include <new> #include "gtest/gtest.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/parameter_accessors.h" #include "tcmalloc/malloc_extension.h" @@ -62,23 +62,23 @@ TEST(HeapProfilingTest, PeakHeapTracking) { // make a large allocation to force a new peak heap sample // (total live: 50MiB) void *first = ::operator new(50 << 20); - // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. - benchmark::DoNotOptimize(first); + // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. + benchmark::DoNotOptimize(first); int64_t peak_after_first = ProfileSize(ProfileType::kPeakHeap); EXPECT_NEAR(peak_after_first, start_peak_sz + (50 << 20), 10 << 20); // a small allocation shouldn't increase the peak // (total live: 54MiB) void *second = ::operator new(4 << 20); - benchmark::DoNotOptimize(second); + benchmark::DoNotOptimize(second); int64_t peak_after_second = ProfileSize(ProfileType::kPeakHeap); EXPECT_EQ(peak_after_second, peak_after_first); // but a large one should // (total live: 254MiB) void *third = ::operator new(200 << 20); - benchmark::DoNotOptimize(third); + benchmark::DoNotOptimize(third); int64_t peak_after_third = ProfileSize(ProfileType::kPeakHeap); EXPECT_NEAR(peak_after_third, peak_after_second + (200 << 20), 10 << 20); @@ -96,9 +96,9 @@ TEST(HeapProfilingTest, PeakHeapTracking) { // going back up less than previous peak shouldn't affect the peak // (total live: 200MiB) void *fourth = ::operator new(100 << 20); - benchmark::DoNotOptimize(fourth); + benchmark::DoNotOptimize(fourth); void *fifth = ::operator new(100 << 20); - benchmark::DoNotOptimize(fifth); + benchmark::DoNotOptimize(fifth); EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third); // passing the old peak significantly, even with many small allocations, @@ -107,7 +107,7 @@ TEST(HeapProfilingTest, PeakHeapTracking) { void *bitsy[1 << 10]; for (int i = 0; i < 1 << 10; i++) { bitsy[i] = ::operator new(1 << 18); - benchmark::DoNotOptimize(bitsy[i]); + benchmark::DoNotOptimize(bitsy[i]); } EXPECT_GT(ProfileSize(ProfileType::kPeakHeap), peak_after_third); diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc index 898c6d934a..fca1125532 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc @@ -22,9 +22,9 @@ #include "absl/base/internal/cycleclock.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { const HugeAddressMap::Node *HugeAddressMap::Node::next() const { const Node *n = right_; @@ -116,7 +116,7 @@ size_t HugeAddressMap::nranges() const { return used_nodes_; } HugeLength HugeAddressMap::total_mapped() const { return total_size_; } -void HugeAddressMap::Print(Printer *out) const { +void HugeAddressMap::Print(Printer *out) const { out->printf("HugeAddressMap: treap %zu / %zu nodes used / created\n", used_nodes_, total_nodes_); const size_t longest = root_ ? root_->longest_.raw_num() : 0; @@ -369,6 +369,6 @@ HugeAddressMap::Node *HugeAddressMap::Get(HugeRange r) { HugeAddressMap::Node::Node(HugeRange r, int prio) : range_(r), prio_(prio), when_(absl::base_internal::CycleClock::Now()) {} -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h index 3c71f19a3f..4a9889e765 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h @@ -20,9 +20,9 @@ #include "tcmalloc/huge_pages.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Maintains a set of disjoint HugeRanges, merging adjacent ranges into one. // Exposes a balanced (somehow) binary tree of free ranges on address, @@ -93,7 +93,7 @@ class HugeAddressMap { // Statistics size_t nranges() const; HugeLength total_mapped() const; - void Print(Printer *out) const; + void Print(Printer *out) const; void PrintInPbtxt(PbtxtRegion *hpaa) const; // Add <r> to the map, merging with adjacent ranges as needed. @@ -141,8 +141,8 @@ inline const HugeAddressMap::Node *HugeAddressMap::root() const { return root_; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_ADDRESS_MAP_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc index 455cd63809..801c797c11 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc @@ -22,7 +22,7 @@ #include "gtest/gtest.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class HugeAddressMapTest : public ::testing::Test { @@ -81,5 +81,5 @@ TEST_F(HugeAddressMapTest, Merging) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc index c77f4522ad..552d4f51b7 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc @@ -19,11 +19,11 @@ #include "tcmalloc/huge_address_map.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { -void HugeAllocator::Print(Printer *out) { +void HugeAllocator::Print(Printer *out) { out->printf("HugeAllocator: contiguous, unbacked hugepage(s)\n"); free_.Print(out); out->printf( @@ -170,6 +170,6 @@ void HugeAllocator::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, } } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h index 6242805c49..7e3936832d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h @@ -24,10 +24,10 @@ #include "tcmalloc/huge_pages.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - +namespace tcmalloc_internal { + // these typedefs allow replacement of tcmalloc::System* for tests. typedef void *(*MemoryAllocFunction)(size_t bytes, size_t *actual, size_t align); @@ -68,7 +68,7 @@ class HugeAllocator { return s; } - void Print(Printer *out); + void Print(Printer *out); void PrintInPbtxt(PbtxtRegion *hpaa) const; private: @@ -101,8 +101,8 @@ class HugeAllocator { HugeRange AllocateRange(HugeLength n); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc index 150075b88e..32fe91c3b5 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc @@ -32,7 +32,7 @@ #include "tcmalloc/internal/logging.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class HugeAllocatorTest : public testing::TestWithParam<bool> { @@ -445,5 +445,5 @@ INSTANTIATE_TEST_SUITE_P( }); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc index 0d25da2983..1d39783efc 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc @@ -23,9 +23,9 @@ #include "tcmalloc/internal/logging.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { template <size_t kEpochs> void MinMaxTracker<kEpochs>::Report(HugeLength val) { @@ -53,7 +53,7 @@ HugeLength MinMaxTracker<kEpochs>::MinOverTime(absl::Duration t) const { } template <size_t kEpochs> -void MinMaxTracker<kEpochs>::Print(Printer *out) const { +void MinMaxTracker<kEpochs>::Print(Printer *out) const { // Prints timestamp:min_pages:max_pages for each window with records. // Timestamp == kEpochs - 1 is the most recent measurement. const int64_t millis = absl::ToInt64Milliseconds(kEpochLength); @@ -372,7 +372,7 @@ HugeAddressMap::Node *HugeCache::Find(HugeLength n) { return best; } -void HugeCache::Print(Printer *out) { +void HugeCache::Print(Printer *out) { const int64_t millis = absl::ToInt64Milliseconds(kCacheTime); out->printf( "HugeCache: contains unused, backed hugepage(s) " @@ -439,9 +439,9 @@ void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) { const double overflow_rate = safe_ratio(overflows_, fills_); // number of bytes in HugeCache - hpaa->PrintI64("cached_huge_page_bytes", size_.in_bytes()); + hpaa->PrintI64("cached_huge_page_bytes", size_.in_bytes()); // max allowed bytes in HugeCache - hpaa->PrintI64("max_cached_huge_page_bytes", limit().in_bytes()); + hpaa->PrintI64("max_cached_huge_page_bytes", limit().in_bytes()); // lifetime cache hit rate hpaa->PrintDouble("huge_cache_hit_rate", hit_rate); // lifetime cache overflow rate @@ -489,6 +489,6 @@ void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) { detailed_tracker_.PrintInPbtxt(hpaa); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.h b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h index 2ffda26cb2..c225834a96 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h @@ -28,14 +28,14 @@ #include "tcmalloc/experiment_config.h" #include "tcmalloc/huge_allocator.h" #include "tcmalloc/huge_pages.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/timeseries_tracker.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { typedef void (*MemoryModifyFunction)(void *start, size_t len); @@ -48,7 +48,7 @@ class MinMaxTracker { : kEpochLength(w / kEpochs), timeseries_(clock, w) {} void Report(HugeLength val); - void Print(Printer *out) const; + void Print(Printer *out) const; void PrintInPbtxt(PbtxtRegion *hpaa) const; // If t < kEpochLength, these functions return statistics for last epoch. The @@ -151,7 +151,7 @@ class HugeCache { return s; } - void Print(Printer *out); + void Print(Printer *out); void PrintInPbtxt(PbtxtRegion *hpaa); private: @@ -221,8 +221,8 @@ class HugeCache { MemoryModifyFunction unback_; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_CACHE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc index 2699b44303..41a6427519 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc @@ -36,7 +36,7 @@ #include "tcmalloc/stats.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class HugeCacheTest : public testing::Test { @@ -55,10 +55,10 @@ class HugeCacheTest : public testing::Test { // Use a tiny fraction of actual size so we can test aggressively. static void* AllocateFake(size_t bytes, size_t* actual, size_t align) { if (bytes % kHugePageSize != 0) { - Crash(kCrash, __FILE__, __LINE__, "not aligned", bytes, kHugePageSize); + Crash(kCrash, __FILE__, __LINE__, "not aligned", bytes, kHugePageSize); } if (align % kHugePageSize != 0) { - Crash(kCrash, __FILE__, __LINE__, "not aligned", align, kHugePageSize); + Crash(kCrash, __FILE__, __LINE__, "not aligned", align, kHugePageSize); } *actual = bytes; // we'll actually provide hidden backing, one word per hugepage. @@ -127,7 +127,7 @@ class HugeCacheTest : public testing::Test { clock_offset_ += absl::ToInt64Nanoseconds(d); } - HugeAllocator alloc_{AllocateFake, MallocMetadata}; + HugeAllocator alloc_{AllocateFake, MallocMetadata}; HugeCache cache_{&alloc_, MallocMetadata, MockUnback, Clock{.now = GetClock, .freq = GetClockFrequency}}; }; @@ -213,7 +213,7 @@ TEST_F(HugeCacheTest, Regret) { absl::Duration d = absl::Seconds(20); Advance(d); char buf[512]; - Printer out(buf, 512); + Printer out(buf, 512); cache_.Print(&out); // To update the regret uint64_t expected_regret = absl::ToInt64Nanoseconds(d) * cached.raw_num(); // Not exactly accurate since the mock clock advances with real time, and @@ -511,7 +511,7 @@ int64_t MinMaxTrackerTest::clock_{0}; TEST_F(MinMaxTrackerTest, Works) { const absl::Duration kDuration = absl::Seconds(2); - MinMaxTracker<> tracker{ + MinMaxTracker<> tracker{ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kDuration}; tracker.Report(NHugePages(0)); @@ -559,5 +559,5 @@ TEST_F(MinMaxTrackerTest, Works) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc index e662456df6..f9aa10d134 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc @@ -36,12 +36,12 @@ #include "tcmalloc/static_vars.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { bool decide_want_hpaa(); -ABSL_ATTRIBUTE_WEAK int default_want_hpaa(); +ABSL_ATTRIBUTE_WEAK int default_want_hpaa(); ABSL_ATTRIBUTE_WEAK int default_subrelease(); bool decide_subrelease() { @@ -50,34 +50,34 @@ bool decide_subrelease() { return false; } - const char *e = thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); + const char *e = thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); if (e) { - switch (e[0]) { - case '0': - if (kPageShift <= 12) { - return false; - } - - if (default_want_hpaa != nullptr) { - int default_hpaa = default_want_hpaa(); - if (default_hpaa < 0) { - return false; - } - } - - Log(kLog, __FILE__, __LINE__, - "Runtime opt-out from HPAA requires building with " - "//tcmalloc:want_no_hpaa." - ); - break; - case '1': - return false; - case '2': - return true; - default: - Crash(kCrash, __FILE__, __LINE__, "bad env var", e); - return false; - } + switch (e[0]) { + case '0': + if (kPageShift <= 12) { + return false; + } + + if (default_want_hpaa != nullptr) { + int default_hpaa = default_want_hpaa(); + if (default_hpaa < 0) { + return false; + } + } + + Log(kLog, __FILE__, __LINE__, + "Runtime opt-out from HPAA requires building with " + "//tcmalloc:want_no_hpaa." + ); + break; + case '1': + return false; + case '2': + return true; + default: + Crash(kCrash, __FILE__, __LINE__, "bad env var", e); + return false; + } } if (default_subrelease != nullptr) { @@ -95,7 +95,7 @@ bool decide_subrelease() { } FillerPartialRerelease decide_partial_rerelease() { - const char *e = thread_safe_getenv("TCMALLOC_PARTIAL_RELEASE_CONTROL"); + const char *e = thread_safe_getenv("TCMALLOC_PARTIAL_RELEASE_CONTROL"); if (e) { if (e[0] == '0') { return FillerPartialRerelease::Return; @@ -126,8 +126,8 @@ HugePageAwareAllocator::HugePageAwareAllocator(MemoryTag tag) switch (tag) { case MemoryTag::kNormal: return AllocAndReport<MemoryTag::kNormal>; - case MemoryTag::kNormalP1: - return AllocAndReport<MemoryTag::kNormalP1>; + case MemoryTag::kNormalP1: + return AllocAndReport<MemoryTag::kNormalP1>; case MemoryTag::kSampled: return AllocAndReport<MemoryTag::kSampled>; default: @@ -184,7 +184,7 @@ PageId HugePageAwareAllocator::RefillFiller(Length n, bool *from_released) { Span *HugePageAwareAllocator::Finalize(Length n, PageId page) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - ASSERT(page != PageId{0}); + ASSERT(page != PageId{0}); Span *ret = Span::New(page, n); Static::pagemap().Set(page, ret); ASSERT(!ret->sampled()); @@ -196,16 +196,16 @@ Span *HugePageAwareAllocator::Finalize(Length n, PageId page) // For anything <= half a huge page, we will unconditionally use the filler // to pack it into a single page. If we need another page, that's fine. Span *HugePageAwareAllocator::AllocSmall(Length n, bool *from_released) { - auto [pt, page] = filler_.TryGet(n); - if (ABSL_PREDICT_TRUE(pt != nullptr)) { + auto [pt, page] = filler_.TryGet(n); + if (ABSL_PREDICT_TRUE(pt != nullptr)) { *from_released = false; return Finalize(n, page); } page = RefillFiller(n, from_released); - if (ABSL_PREDICT_FALSE(page == PageId{0})) { - return nullptr; - } + if (ABSL_PREDICT_FALSE(page == PageId{0})) { + return nullptr; + } return Finalize(n, page); } @@ -219,8 +219,8 @@ Span *HugePageAwareAllocator::AllocLarge(Length n, bool *from_released) { PageId page; // If we fit in a single hugepage, try the Filler first. if (n < kPagesPerHugePage) { - auto [pt, page] = filler_.TryGet(n); - if (ABSL_PREDICT_TRUE(pt != nullptr)) { + auto [pt, page] = filler_.TryGet(n); + if (ABSL_PREDICT_TRUE(pt != nullptr)) { *from_released = false; return Finalize(n, page); } @@ -307,11 +307,11 @@ Span *HugePageAwareAllocator::New(Length n) { CHECK_CONDITION(n > Length(0)); bool from_released; Span *s = LockAndAlloc(n, &from_released); - if (s) { - // Prefetch for writing, as we anticipate using the memory soon. - __builtin_prefetch(s->start_address(), 1, 3); - if (from_released) BackSpan(s); - } + if (s) { + // Prefetch for writing, as we anticipate using the memory soon. + __builtin_prefetch(s->start_address(), 1, 3); + if (from_released) BackSpan(s); + } ASSERT(!s || GetMemoryTag(s->start_address()) == tag_); return s; } @@ -326,7 +326,7 @@ Span *HugePageAwareAllocator::LockAndAlloc(Length n, bool *from_released) { // For anything too big for the filler, we use either a direct hugepage // allocation, or possibly the regions if we are worried about slack. - if (n <= HugeRegion::size().in_pages()) { + if (n <= HugeRegion::size().in_pages()) { return AllocLarge(n, from_released); } @@ -357,7 +357,7 @@ Span *HugePageAwareAllocator::NewAligned(Length n, Length align) { void HugePageAwareAllocator::DeleteFromHugepage(FillerType::Tracker *pt, PageId p, Length n) { - if (ABSL_PREDICT_TRUE(filler_.Put(pt, p, n) == nullptr)) return; + if (ABSL_PREDICT_TRUE(filler_.Put(pt, p, n) == nullptr)) return; if (pt->donated()) { --donated_huge_pages_; } @@ -365,10 +365,10 @@ void HugePageAwareAllocator::DeleteFromHugepage(FillerType::Tracker *pt, } bool HugePageAwareAllocator::AddRegion() { - HugeRange r = alloc_.Get(HugeRegion::size()); + HugeRange r = alloc_.Get(HugeRegion::size()); if (!r.valid()) return false; - HugeRegion *region = region_allocator_.New(); - new (region) HugeRegion(r, SystemRelease); + HugeRegion *region = region_allocator_.New(); + new (region) HugeRegion(r, SystemRelease); regions_.Contribute(region); return true; } @@ -387,7 +387,7 @@ void HugePageAwareAllocator::Delete(Span *span) { FillerType::Tracker *pt = GetTracker(hp); // a) We got packed by the filler onto a single hugepage - return our // allocation to that hugepage in the filler. - if (ABSL_PREDICT_TRUE(pt != nullptr)) { + if (ABSL_PREDICT_TRUE(pt != nullptr)) { ASSERT(hp == HugePageContaining(p + n - Length(1))); DeleteFromHugepage(pt, p, n); return; @@ -522,7 +522,7 @@ static double BytesToMiB(size_t bytes) { return bytes / MiB; } -static void BreakdownStats(Printer *out, const BackingStats &s, +static void BreakdownStats(Printer *out, const BackingStats &s, const char *label) { out->printf("%s %6.1f MiB used, %6.1f MiB free, %6.1f MiB unmapped\n", label, BytesToMiB(s.system_bytes - s.free_bytes - s.unmapped_bytes), @@ -538,9 +538,9 @@ static void BreakdownStatsInPbtxt(PbtxtRegion *hpaa, const BackingStats &s, } // public -void HugePageAwareAllocator::Print(Printer *out) { Print(out, true); } +void HugePageAwareAllocator::Print(Printer *out) { Print(out, true); } -void HugePageAwareAllocator::Print(Printer *out, bool everything) { +void HugePageAwareAllocator::Print(Printer *out, bool everything) { SmallSpanStats small; LargeSpanStats large; BackingStats bstats; @@ -671,6 +671,6 @@ void HugePageAwareAllocator::UnbackWithoutLock(void *start, size_t length) { pageheap_lock.Lock(); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h index c36a1e515e..ee0d0c93a5 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h @@ -25,7 +25,7 @@ #include "tcmalloc/huge_page_filler.h" #include "tcmalloc/huge_pages.h" #include "tcmalloc/huge_region.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/page_allocator_interface.h" #include "tcmalloc/page_heap_allocator.h" @@ -33,9 +33,9 @@ #include "tcmalloc/stats.h" #include "tcmalloc/system-alloc.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { bool decide_subrelease(); @@ -45,7 +45,7 @@ bool decide_subrelease(); class HugePageAwareAllocator final : public PageAllocatorInterface { public: explicit HugePageAwareAllocator(MemoryTag tag); - ~HugePageAwareAllocator() override = default; + ~HugePageAwareAllocator() override = default; // Allocate a run of "n" pages. Returns zero if out of memory. // Caller should not pass "n == 0" -- instead, n should have @@ -84,11 +84,11 @@ class HugePageAwareAllocator final : public PageAllocatorInterface { ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Prints stats about the page heap to *out. - void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; + void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; // Print stats to *out, excluding long/likely uninteresting things // unless <everything> is true. - void Print(Printer* out, bool everything) ABSL_LOCKS_EXCLUDED(pageheap_lock); + void Print(Printer* out, bool everything) ABSL_LOCKS_EXCLUDED(pageheap_lock); void PrintInPbtxt(PbtxtRegion* region) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; @@ -108,10 +108,10 @@ class HugePageAwareAllocator final : public PageAllocatorInterface { static void UnbackWithoutLock(void* start, size_t length) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - HugeRegionSet<HugeRegion> regions_; + HugeRegionSet<HugeRegion> regions_; PageHeapAllocator<FillerType::Tracker> tracker_allocator_; - PageHeapAllocator<HugeRegion> region_allocator_; + PageHeapAllocator<HugeRegion> region_allocator_; FillerType::Tracker* GetTracker(HugePage p); @@ -168,8 +168,8 @@ class HugePageAwareAllocator final : public PageAllocatorInterface { Span* Finalize(Length n, PageId page); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc index 83ae930e44..90e179c939 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc @@ -44,7 +44,7 @@ #include "absl/synchronization/barrier.h" #include "absl/synchronization/mutex.h" #include "absl/time/time.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/common.h" #include "tcmalloc/huge_pages.h" #include "tcmalloc/internal/logging.h" @@ -62,7 +62,7 @@ ABSL_FLAG(uint64_t, limit, 0, ""); ABSL_FLAG(bool, always_check_usage, false, "enable expensive memory checks"); namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { using testing::HasSubstr; @@ -181,7 +181,7 @@ class HugePageAwareAllocatorTest : public ::testing::Test { std::string ret; const size_t kSize = 1 << 20; ret.resize(kSize); - Printer p(&ret[0], kSize); + Printer p(&ret[0], kSize); allocator_->Print(&p); ret.erase(p.SpaceRequired()); return ret; @@ -191,7 +191,7 @@ class HugePageAwareAllocatorTest : public ::testing::Test { std::string ret; const size_t kSize = 1 << 20; ret.resize(kSize); - Printer p(&ret[0], kSize); + Printer p(&ret[0], kSize); { PbtxtRegion region(&p, kNested, 0); allocator_->PrintInPbtxt(®ion); @@ -473,7 +473,7 @@ TEST_F(HugePageAwareAllocatorTest, LargeSmall) { constexpr size_t kBufferSize = 1024 * 1024; char buffer[kBufferSize]; - Printer printer(buffer, kBufferSize); + Printer printer(buffer, kBufferSize); allocator_->Print(&printer); // Verify that we have less free memory than we allocated in total. We have // to account for bytes tied up in the cache. @@ -953,5 +953,5 @@ TEST_F(HugePageAwareAllocatorTest, ParallelRelease) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h index 2f72b43881..8a35158298 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h @@ -24,26 +24,26 @@ #include "absl/algorithm/container.h" #include "absl/base/internal/cycleclock.h" #include "absl/time/time.h" -#include "tcmalloc/common.h" +#include "tcmalloc/common.h" #include "tcmalloc/huge_allocator.h" #include "tcmalloc/huge_cache.h" #include "tcmalloc/huge_pages.h" #include "tcmalloc/internal/linked_list.h" -#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/optimization.h" #include "tcmalloc/internal/range_tracker.h" #include "tcmalloc/internal/timeseries_tracker.h" #include "tcmalloc/span.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - -// This and the following classes implement the adaptive hugepage subrelease -// mechanism and realized fragmentation metric described in "Adaptive Hugepage -// Subrelease for Non-moving Memory Allocators in Warehouse-Scale Computers" -// (ISMM 2021). +namespace tcmalloc_internal { +// This and the following classes implement the adaptive hugepage subrelease +// mechanism and realized fragmentation metric described in "Adaptive Hugepage +// Subrelease for Non-moving Memory Allocators in Warehouse-Scale Computers" +// (ISMM 2021). + // Tracks correctness of skipped subrelease decisions over time. template <size_t kEpochs = 16> class SkippedSubreleaseCorrectnessTracker { @@ -284,7 +284,7 @@ class FillerStatsTracker { } } - void Print(Printer* out) const; + void Print(Printer* out) const; void PrintInPbtxt(PbtxtRegion* hpaa) const; // Calculates recent peaks for skipping subrelease decisions. If our allocated @@ -457,15 +457,15 @@ inline double safe_div(Length a, Length b) { } template <size_t kEpochs> -void FillerStatsTracker<kEpochs>::Print(Printer* out) const { +void FillerStatsTracker<kEpochs>::Print(Printer* out) const { NumberOfFreePages free_pages = min_free_pages(summary_interval_); out->printf("HugePageFiller: time series over %d min interval\n\n", absl::ToInt64Minutes(summary_interval_)); - - // Realized fragmentation is equivalent to backed minimum free pages over a - // 5-min interval. It is printed for convenience but not included in pbtxt. - out->printf("HugePageFiller: realized fragmentation: %.1f MiB\n", - free_pages.free_backed.in_mib()); + + // Realized fragmentation is equivalent to backed minimum free pages over a + // 5-min interval. It is printed for convenience but not included in pbtxt. + out->printf("HugePageFiller: realized fragmentation: %.1f MiB\n", + free_pages.free_backed.in_mib()); out->printf("HugePageFiller: minimum free pages: %zu (%zu backed)\n", free_pages.free.raw_num(), free_pages.free_backed.raw_num()); @@ -632,56 +632,56 @@ class PageTracker : public TList<PageTracker<Unback>>::Elem { public: static void UnbackImpl(void* p, size_t size) { Unback(p, size); } - constexpr PageTracker(HugePage p, uint64_t when) + constexpr PageTracker(HugePage p, uint64_t when) : location_(p), released_count_(0), donated_(false), - unbroken_(true), - free_{} { - init_when(when); - -#ifndef __ppc64__ -#if defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Winvalid-offsetof" -#endif - // Verify fields are structured so commonly accessed members (as part of - // Put) are on the first two cache lines. This allows the CentralFreeList - // to accelerate deallocations by prefetching PageTracker instances before - // taking the pageheap_lock. - // - // On PPC64, kHugePageSize / kPageSize is typically ~2K (16MB / 8KB), - // requiring 512 bytes for representing free_. While its cache line size is - // larger, the entirety of free_ will not fit on two cache lines. - static_assert( - offsetof(PageTracker<Unback>, location_) + sizeof(location_) <= - 2 * ABSL_CACHELINE_SIZE, - "location_ should fall within the first two cachelines of " - "PageTracker."); - static_assert(offsetof(PageTracker<Unback>, when_numerator_) + - sizeof(when_numerator_) <= - 2 * ABSL_CACHELINE_SIZE, - "when_numerator_ should fall within the first two cachelines " - "of PageTracker."); - static_assert(offsetof(PageTracker<Unback>, when_denominator_) + - sizeof(when_denominator_) <= - 2 * ABSL_CACHELINE_SIZE, - "when_denominator_ should fall within the first two " - "cachelines of PageTracker."); - static_assert( - offsetof(PageTracker<Unback>, donated_) + sizeof(donated_) <= - 2 * ABSL_CACHELINE_SIZE, - "donated_ should fall within the first two cachelines of PageTracker."); - static_assert( - offsetof(PageTracker<Unback>, free_) + sizeof(free_) <= - 2 * ABSL_CACHELINE_SIZE, - "free_ should fall within the first two cachelines of PageTracker."); -#if defined(__GNUC__) -#pragma GCC diagnostic pop -#endif -#endif // __ppc64__ - } - + unbroken_(true), + free_{} { + init_when(when); + +#ifndef __ppc64__ +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Winvalid-offsetof" +#endif + // Verify fields are structured so commonly accessed members (as part of + // Put) are on the first two cache lines. This allows the CentralFreeList + // to accelerate deallocations by prefetching PageTracker instances before + // taking the pageheap_lock. + // + // On PPC64, kHugePageSize / kPageSize is typically ~2K (16MB / 8KB), + // requiring 512 bytes for representing free_. While its cache line size is + // larger, the entirety of free_ will not fit on two cache lines. + static_assert( + offsetof(PageTracker<Unback>, location_) + sizeof(location_) <= + 2 * ABSL_CACHELINE_SIZE, + "location_ should fall within the first two cachelines of " + "PageTracker."); + static_assert(offsetof(PageTracker<Unback>, when_numerator_) + + sizeof(when_numerator_) <= + 2 * ABSL_CACHELINE_SIZE, + "when_numerator_ should fall within the first two cachelines " + "of PageTracker."); + static_assert(offsetof(PageTracker<Unback>, when_denominator_) + + sizeof(when_denominator_) <= + 2 * ABSL_CACHELINE_SIZE, + "when_denominator_ should fall within the first two " + "cachelines of PageTracker."); + static_assert( + offsetof(PageTracker<Unback>, donated_) + sizeof(donated_) <= + 2 * ABSL_CACHELINE_SIZE, + "donated_ should fall within the first two cachelines of PageTracker."); + static_assert( + offsetof(PageTracker<Unback>, free_) + sizeof(free_) <= + 2 * ABSL_CACHELINE_SIZE, + "free_ should fall within the first two cachelines of PageTracker."); +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif +#endif // __ppc64__ + } + struct PageAllocation { PageId page; Length previously_unbacked; @@ -754,26 +754,26 @@ class PageTracker : public TList<PageTracker<Unback>>::Elem { PageAgeHistograms* ages) const; private: - void init_when(uint64_t w) { - const Length before = Length(free_.total_free()); - when_numerator_ = w * before.raw_num(); - when_denominator_ = before.raw_num(); - } - + void init_when(uint64_t w) { + const Length before = Length(free_.total_free()); + when_numerator_ = w * before.raw_num(); + when_denominator_ = before.raw_num(); + } + HugePage location_; - // We keep track of an average time weighted by Length::raw_num. In order to - // avoid doing division on fast path, store the numerator and denominator and - // only do the division when we need the average. - uint64_t when_numerator_; - uint64_t when_denominator_; - - // Cached value of released_by_page_.CountBits(0, kPagesPerHugePages) - // - // TODO(b/151663108): Logically, this is guarded by pageheap_lock. - uint16_t released_count_; - bool donated_; - bool unbroken_; - + // We keep track of an average time weighted by Length::raw_num. In order to + // avoid doing division on fast path, store the numerator and denominator and + // only do the division when we need the average. + uint64_t when_numerator_; + uint64_t when_denominator_; + + // Cached value of released_by_page_.CountBits(0, kPagesPerHugePages) + // + // TODO(b/151663108): Logically, this is guarded by pageheap_lock. + uint16_t released_count_; + bool donated_; + bool unbroken_; + RangeTracker<kPagesPerHugePage.raw_num()> free_; // Bitmap of pages based on them being released to the OS. // * Not yet released pages are unset (considered "free") @@ -837,19 +837,19 @@ class HugePageFiller { typedef TrackerType Tracker; - struct TryGetResult { - TrackerType* pt; - PageId page; - }; - - // Our API is simple, but note that it does not include an unconditional - // allocation, only a "try"; we expect callers to allocate new hugepages if - // needed. This simplifies using it in a few different contexts (and improves - // the testing story - no dependencies.) - // - // On failure, returns nullptr/PageId{0}. - TryGetResult TryGet(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - + struct TryGetResult { + TrackerType* pt; + PageId page; + }; + + // Our API is simple, but note that it does not include an unconditional + // allocation, only a "try"; we expect callers to allocate new hugepages if + // needed. This simplifies using it in a few different contexts (and improves + // the testing story - no dependencies.) + // + // On failure, returns nullptr/PageId{0}. + TryGetResult TryGet(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + // Marks [p, p + n) as usable by new allocations into *pt; returns pt // if that hugepage is now empty (nullptr otherwise.) // REQUIRES: pt is owned by this object (has been Contribute()), and @@ -903,7 +903,7 @@ class HugePageFiller { BackingStats stats() const; SubreleaseStats subrelease_stats() const { return subrelease_stats_; } - void Print(Printer* out, bool everything) const; + void Print(Printer* out, bool everything) const; void PrintInPbtxt(PbtxtRegion* hpaa) const; private: @@ -1070,18 +1070,18 @@ inline typename PageTracker<Unback>::PageAllocation PageTracker<Unback>::Get( ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == released_count_); - size_t unbacked = 0; - // If release_count_ == 0, CountBits will return 0 and ClearRange will be a - // no-op (but will touch cachelines) due to the invariants guaranteed by - // CountBits() == released_count_. - // - // This is a performance optimization, not a logical requirement. - if (ABSL_PREDICT_FALSE(released_count_ > 0)) { - unbacked = released_by_page_.CountBits(index, n.raw_num()); - released_by_page_.ClearRange(index, n.raw_num()); - ASSERT(released_count_ >= unbacked); - released_count_ -= unbacked; - } + size_t unbacked = 0; + // If release_count_ == 0, CountBits will return 0 and ClearRange will be a + // no-op (but will touch cachelines) due to the invariants guaranteed by + // CountBits() == released_count_. + // + // This is a performance optimization, not a logical requirement. + if (ABSL_PREDICT_FALSE(released_count_ > 0)) { + unbacked = released_by_page_.CountBits(index, n.raw_num()); + released_by_page_.ClearRange(index, n.raw_num()); + ASSERT(released_count_ >= unbacked); + released_count_ -= unbacked; + } ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == released_count_); @@ -1094,8 +1094,8 @@ inline void PageTracker<Unback>::Put(PageId p, Length n) { Length index = p - location_.first_page(); free_.Unmark(index.raw_num(), n.raw_num()); - when_numerator_ += n.raw_num() * absl::base_internal::CycleClock::Now(); - when_denominator_ += n.raw_num(); + when_numerator_ += n.raw_num() * absl::base_internal::CycleClock::Now(); + when_denominator_ += n.raw_num(); } template <MemoryModifyFunction Unback> @@ -1145,7 +1145,7 @@ inline Length PageTracker<Unback>::ReleaseFree() { ASSERT(Length(released_count_) <= kPagesPerHugePage); ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) == released_count_); - init_when(absl::base_internal::CycleClock::Now()); + init_when(absl::base_internal::CycleClock::Now()); return Length(count); } @@ -1155,8 +1155,8 @@ inline void PageTracker<Unback>::AddSpanStats(SmallSpanStats* small, PageAgeHistograms* ages) const { size_t index = 0, n; - uint64_t w = when_denominator_ == 0 ? when_numerator_ - : when_numerator_ / when_denominator_; + uint64_t w = when_denominator_ == 0 ? when_numerator_ + : when_numerator_ / when_denominator_; while (free_.NextFreeRange(index, &index, &n)) { bool is_released = released_by_page_.GetBit(index); // Find the last bit in the run with the same state (set or cleared) as @@ -1224,8 +1224,8 @@ inline HugePageFiller<TrackerType>::HugePageFiller( fillerstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)) {} template <class TrackerType> -inline typename HugePageFiller<TrackerType>::TryGetResult -HugePageFiller<TrackerType>::TryGet(Length n) { +inline typename HugePageFiller<TrackerType>::TryGetResult +HugePageFiller<TrackerType>::TryGet(Length n) { ASSERT(n > Length(0)); // How do we choose which hugepage to allocate from (among those with @@ -1292,7 +1292,7 @@ HugePageFiller<TrackerType>::TryGet(Length n) { // So all we have to do is find the first nonempty freelist in the regular // HintedTrackerList that *could* support our allocation, and it will be our // best choice. If there is none we repeat with the donated HintedTrackerList. - ASSUME(n < kPagesPerHugePage); + ASSUME(n < kPagesPerHugePage); TrackerType* pt; bool was_released = false; @@ -1325,11 +1325,11 @@ HugePageFiller<TrackerType>::TryGet(Length n) { break; } - return {nullptr, PageId{0}}; + return {nullptr, PageId{0}}; } while (false); - ASSUME(pt != nullptr); + ASSUME(pt != nullptr); ASSERT(pt->longest_free_range() >= n); - const auto page_allocation = pt->Get(n); + const auto page_allocation = pt->Get(n); AddToFillerList(pt); allocated_ += n; @@ -1341,7 +1341,7 @@ HugePageFiller<TrackerType>::TryGet(Length n) { // donated by this point. ASSERT(!pt->donated()); UpdateFillerStatsTracker(); - return {pt, page_allocation.page}; + return {pt, page_allocation.page}; } // Marks [p, p + n) as usable by new allocations into *pt; returns pt @@ -1668,7 +1668,7 @@ inline BackingStats HugePageFiller<TrackerType>::stats() const { return s; } -namespace huge_page_filler_internal { +namespace huge_page_filler_internal { // Computes some histograms of fullness. Because nearly empty/full huge pages // are much more interesting, we calculate 4 buckets at each of the beginning // and end of size one, and then divide the overall space by 16 to have 16 @@ -1719,7 +1719,7 @@ class UsageInfo { nalloc_histo_[which][BucketNum(nalloc - 1)]++; } - void Print(Printer* out) { + void Print(Printer* out) { PrintHisto(out, free_page_histo_[kRegular], "# of regular hps with a<= # of free pages <b", 0); PrintHisto(out, free_page_histo_[kDonated], @@ -1769,7 +1769,7 @@ class UsageInfo { return it - bucket_bounds_ - 1; } - void PrintHisto(Printer* out, Histo h, const char blurb[], size_t offset) { + void PrintHisto(Printer* out, Histo h, const char blurb[], size_t offset) { out->printf("\nHugePageFiller: %s", blurb); for (size_t i = 0; i < buckets_size_; ++i) { if (i % 6 == 0) { @@ -1799,10 +1799,10 @@ class UsageInfo { size_t bucket_bounds_[kBucketCapacity]; int buckets_size_ = 0; }; -} // namespace huge_page_filler_internal +} // namespace huge_page_filler_internal template <class TrackerType> -inline void HugePageFiller<TrackerType>::Print(Printer* out, +inline void HugePageFiller<TrackerType>::Print(Printer* out, bool everything) const { out->printf("HugePageFiller: densely pack small requests into hugepages\n"); @@ -1864,7 +1864,7 @@ inline void HugePageFiller<TrackerType>::Print(Printer* out, if (!everything) return; // Compute some histograms of fullness. - using huge_page_filler_internal::UsageInfo; + using huge_page_filler_internal::UsageInfo; UsageInfo usage; regular_alloc_.Iter( [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0); @@ -1942,7 +1942,7 @@ inline void HugePageFiller<TrackerType>::PrintInPbtxt(PbtxtRegion* hpaa) const { "filler_num_hugepages_broken_due_to_limit", subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num()); // Compute some histograms of fullness. - using huge_page_filler_internal::UsageInfo; + using huge_page_filler_internal::UsageInfo; UsageInfo usage; regular_alloc_.Iter( [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0); @@ -2106,8 +2106,8 @@ inline Length HugePageFiller<TrackerType>::free_pages() const { return size().in_pages() - used_pages() - unmapped_pages(); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_PAGE_FILLER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc index 9879d41d79..7695f0d140 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc @@ -45,21 +45,21 @@ #include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/common.h" #include "tcmalloc/huge_pages.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/pages.h" #include "tcmalloc/stats.h" -using tcmalloc::tcmalloc_internal::Length; - -ABSL_FLAG(Length, page_tracker_defrag_lim, Length(32), +using tcmalloc::tcmalloc_internal::Length; + +ABSL_FLAG(Length, page_tracker_defrag_lim, Length(32), "Max allocation size for defrag test"); -ABSL_FLAG(Length, frag_req_limit, Length(32), +ABSL_FLAG(Length, frag_req_limit, Length(32), "request size limit for frag test"); -ABSL_FLAG(Length, frag_size, Length(512 * 1024), +ABSL_FLAG(Length, frag_size, Length(512 * 1024), "target number of pages for frag test"); ABSL_FLAG(uint64_t, frag_iters, 10 * 1000 * 1000, "iterations for frag test"); @@ -69,7 +69,7 @@ ABSL_FLAG(uint64_t, bytes, 1024 * 1024 * 1024, "baseline usage"); ABSL_FLAG(double, growth_factor, 2.0, "growth over baseline"); namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // This is an arbitrary distribution taken from page requests from @@ -719,15 +719,15 @@ class FillerTest : public testing::TestWithParam<FillerPartialRerelease> { EXPECT_LT(n, kPagesPerHugePage); PAlloc ret; ret.n = n; - ret.pt = nullptr; + ret.pt = nullptr; ret.mark = ++next_mark_; if (!donated) { // Donated means always create a new hugepage absl::base_internal::SpinLockHolder l(&pageheap_lock); - auto [pt, page] = filler_.TryGet(n); - ret.pt = pt; - ret.p = page; + auto [pt, page] = filler_.TryGet(n); + ret.pt = pt; + ret.p = page; } - if (ret.pt == nullptr) { + if (ret.pt == nullptr) { ret.pt = new FakeTracker(GetBacking(), absl::base_internal::CycleClock::Now()); { @@ -940,7 +940,7 @@ TEST_P(FillerTest, PrintFreeRatio) { std::string buffer(1024 * 1024, '\0'); { - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); filler_.Print(&printer, /*everything=*/true); buffer.erase(printer.SpaceRequired()); } @@ -1429,7 +1429,7 @@ TEST_P(FillerTest, SkipSubrelease) { std::string buffer(1024 * 1024, '\0'); { - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); filler_.Print(&printer, true); } buffer.resize(strlen(buffer.c_str())); @@ -1451,7 +1451,7 @@ class FillerStatsTrackerTest : public testing::Test { protected: static constexpr absl::Duration kWindow = absl::Minutes(10); - using StatsTrackerType = FillerStatsTracker<16>; + using StatsTrackerType = FillerStatsTracker<16>; StatsTrackerType tracker_{ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kWindow, absl::Minutes(5)}; @@ -1528,7 +1528,7 @@ TEST_F(FillerStatsTrackerTest, Works) { // Test text output (time series summary). { std::string buffer(1024 * 1024, '\0'); - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); { tracker_.Print(&printer); buffer.erase(printer.SpaceRequired()); @@ -1536,7 +1536,7 @@ TEST_F(FillerStatsTrackerTest, Works) { EXPECT_THAT(buffer, StrEq(R"(HugePageFiller: time series over 5 min interval -HugePageFiller: realized fragmentation: 0.8 MiB +HugePageFiller: realized fragmentation: 0.8 MiB HugePageFiller: minimum free pages: 110 (100 backed) HugePageFiller: at peak demand: 208 pages (and 111 free, 10 unmapped) HugePageFiller: at peak demand: 26 hps (14 regular, 10 donated, 1 partial, 1 released) @@ -1552,7 +1552,7 @@ HugePageFiller: Subrelease stats last 10 min: total 0 pages subreleased, 0 hugep // Test pbtxt output (full time series). { std::string buffer(1024 * 1024, '\0'); - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); { PbtxtRegion region(&printer, kTop, /*indent=*/0); tracker_.PrintInPbtxt(®ion); @@ -1870,7 +1870,7 @@ TEST_P(FillerTest, Print) { std::string buffer(1024 * 1024, '\0'); { - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); filler_.Print(&printer, /*everything=*/true); buffer.erase(printer.SpaceRequired()); } @@ -1950,7 +1950,7 @@ HugePageFiller: <225<= 0 <241<= 0 <253<= 0 <254<= 0 <255<= 0 HugePageFiller: time series over 5 min interval -HugePageFiller: realized fragmentation: 0.0 MiB +HugePageFiller: realized fragmentation: 0.0 MiB HugePageFiller: minimum free pages: 0 (0 backed) HugePageFiller: at peak demand: 1774 pages (and 261 free, 13 unmapped) HugePageFiller: at peak demand: 8 hps (5 regular, 1 donated, 0 partial, 2 released) @@ -1977,7 +1977,7 @@ TEST_P(FillerTest, PrintInPbtxt) { auto allocs = GenerateInterestingAllocs(); std::string buffer(1024 * 1024, '\0'); - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); { PbtxtRegion region(&printer, kTop, /*indent=*/0); filler_.PrintInPbtxt(®ion); @@ -3577,7 +3577,7 @@ TEST_P(FillerTest, CheckSubreleaseStats) { std::string buffer(1024 * 1024, '\0'); { - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); filler_.Print(&printer, /*everything=*/true); buffer.erase(printer.SpaceRequired()); } @@ -3629,7 +3629,7 @@ TEST_P(FillerTest, ConstantBrokenHugePages) { std::string buffer(1024 * 1024, '\0'); { - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); filler_.Print(&printer, /*everything=*/false); buffer.erase(printer.SpaceRequired()); } @@ -3677,7 +3677,7 @@ TEST_P(FillerTest, CheckBufferSize) { Delete(big); std::string buffer(1024 * 1024, '\0'); - Printer printer(&*buffer.begin(), buffer.size()); + Printer printer(&*buffer.begin(), buffer.size()); { PbtxtRegion region(&printer, kTop, /*indent=*/0); filler_.PrintInPbtxt(®ion); @@ -3795,5 +3795,5 @@ INSTANTIATE_TEST_SUITE_P(All, FillerTest, FillerPartialRerelease::Retain)); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_pages.h b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h index 4498994f75..e58cb2df5e 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_pages.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h @@ -28,12 +28,12 @@ #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/optimization.h" #include "tcmalloc/pages.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { inline constexpr Length kPagesPerHugePage = Length(1 << (kHugePageShift - kPageShift)); @@ -41,23 +41,23 @@ inline constexpr Length kPagesPerHugePage = // A single aligned huge page. struct HugePage { void *start_addr() const { - ASSERT(pn <= kMaxPageNumber); + ASSERT(pn <= kMaxPageNumber); return reinterpret_cast<void *>(pn << kHugePageShift); } - PageId first_page() const { - ASSERT(pn <= kMaxPageNumber); - return PageId(pn << (kHugePageShift - kPageShift)); - } - - size_t index() const { - ASSERT(pn <= kMaxPageNumber); - return pn; - } + PageId first_page() const { + ASSERT(pn <= kMaxPageNumber); + return PageId(pn << (kHugePageShift - kPageShift)); + } - static constexpr uintptr_t kMaxPageNumber = - std::numeric_limits<uintptr_t>::max() >> kHugePageShift; + size_t index() const { + ASSERT(pn <= kMaxPageNumber); + return pn; + } + static constexpr uintptr_t kMaxPageNumber = + std::numeric_limits<uintptr_t>::max() >> kHugePageShift; + uintptr_t pn; }; @@ -85,16 +85,16 @@ struct HugeLength { // Literal constructors (made explicit to avoid accidental uses when // another unit was meant.) -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength NHugePages(size_t n) { return HugeLength(n); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength HLFromBytes(size_t bytes) { return NHugePages(bytes / kHugePageSize); } // Rounds *up* to the nearest hugepage. -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength HLFromPages(Length pages) { return NHugePages((pages + kPagesPerHugePage - Length(1)) / kPagesPerHugePage); @@ -106,7 +106,7 @@ inline HugeLength &operator++(HugeLength &len) { // NOLINT(runtime/references) } inline HugePage &operator++(HugePage &p) { // NOLINT(runtime/references) - ASSERT(p.pn + 1 <= HugePage::kMaxPageNumber); + ASSERT(p.pn + 1 <= HugePage::kMaxPageNumber); p.pn++; return p; } @@ -121,72 +121,72 @@ inline constexpr bool operator<(HugeLength lhs, HugeLength rhs) { return lhs.n < rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>(HugeLength lhs, HugeLength rhs) { return lhs.n > rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<=(HugeLength lhs, HugeLength rhs) { return lhs.n <= rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<(HugePage lhs, HugePage rhs) { return lhs.pn < rhs.pn; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>(HugePage lhs, HugePage rhs) { return lhs.pn > rhs.pn; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>=(HugeLength lhs, HugeLength rhs) { return lhs.n >= rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<=(HugePage lhs, HugePage rhs) { return lhs.pn <= rhs.pn; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>=(HugePage lhs, HugePage rhs) { return lhs.pn >= rhs.pn; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator==(HugePage lhs, HugePage rhs) { return lhs.pn == rhs.pn; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator!=(HugePage lhs, HugePage rhs) { return !(lhs == rhs); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator==(HugeLength lhs, HugeLength rhs) { return lhs.n == rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator!=(HugeLength lhs, HugeLength rhs) { return lhs.n != rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr size_t operator/(HugeLength lhs, HugeLength rhs) { return lhs.n / rhs.n; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator*(HugeLength lhs, size_t rhs) { return NHugePages(lhs.n * rhs); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator/(HugeLength lhs, size_t rhs) { return NHugePages(lhs.n / rhs); } @@ -196,39 +196,39 @@ inline HugeLength &operator*=(HugeLength &lhs, size_t rhs) { return lhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator%(HugeLength lhs, HugeLength rhs) { return NHugePages(lhs.n % rhs.n); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugePage operator+(HugePage lhs, HugeLength rhs) { - ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber); + ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber); return HugePage{lhs.pn + rhs.n}; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugePage operator+(HugeLength lhs, HugePage rhs) { return rhs + lhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugePage operator-(HugePage lhs, HugeLength rhs) { return ASSERT(lhs.pn >= rhs.n), HugePage{lhs.pn - rhs.n}; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator-(HugePage lhs, HugePage rhs) { return ASSERT(lhs.pn >= rhs.pn), NHugePages(lhs.pn - rhs.pn); } inline HugePage &operator+=(HugePage &lhs, HugeLength rhs) { - ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber); + ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber); lhs.pn += rhs.n; return lhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator+(HugeLength lhs, HugeLength rhs) { return NHugePages(lhs.n + rhs.n); } @@ -238,7 +238,7 @@ inline HugeLength &operator+=(HugeLength &lhs, HugeLength rhs) { return lhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr HugeLength operator-(HugeLength lhs, HugeLength rhs) { return ASSERT(lhs.n >= rhs.n), NHugePages(lhs.n - rhs.n); } @@ -257,12 +257,12 @@ inline void PrintTo(const HugeLength &n, ::std::ostream *os) { *os << n.raw_num() << "hps"; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline HugePage HugePageContaining(PageId p) { return {p.index() >> (kHugePageShift - kPageShift)}; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline HugePage HugePageContaining(void *p) { return HugePageContaining(PageIdContaining(p)); } @@ -337,7 +337,7 @@ inline std::pair<HugeRange, HugeRange> Split(HugeRange r, HugeLength n) { } } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_PAGES_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region.h b/contrib/libs/tcmalloc/tcmalloc/huge_region.h index 0262c007b2..4d7195642a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_region.h +++ b/contrib/libs/tcmalloc/tcmalloc/huge_region.h @@ -27,9 +27,9 @@ #include "tcmalloc/pages.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Track allocations from a fixed-size multiple huge page region. // Similar to PageTracker but a few important differences: @@ -43,16 +43,16 @@ namespace tcmalloc_internal { // available gaps (1.75 MiB), and lengths that don't fit, but would // introduce unacceptable fragmentation (2.1 MiB). // -class HugeRegion : public TList<HugeRegion>::Elem { +class HugeRegion : public TList<HugeRegion>::Elem { public: // We could template this if there was any need. - static constexpr HugeLength kRegionSize = HLFromBytes(1024 * 1024 * 1024); - static constexpr size_t kNumHugePages = kRegionSize.raw_num(); - static constexpr HugeLength size() { return kRegionSize; } + static constexpr HugeLength kRegionSize = HLFromBytes(1024 * 1024 * 1024); + static constexpr size_t kNumHugePages = kRegionSize.raw_num(); + static constexpr HugeLength size() { return kRegionSize; } // REQUIRES: r.len() == size(); r unbacked. - HugeRegion(HugeRange r, MemoryModifyFunction unback); - HugeRegion() = delete; + HugeRegion(HugeRange r, MemoryModifyFunction unback); + HugeRegion() = delete; // If available, return a range of n free pages, setting *from_released = // true iff the returned range is currently unbacked. @@ -82,7 +82,7 @@ class HugeRegion : public TList<HugeRegion>::Elem { HugeLength backed() const; - void Print(Printer *out) const; + void Print(Printer *out) const; void PrintInPbtxt(PbtxtRegion *detail) const; BackingStats stats() const; @@ -97,7 +97,7 @@ class HugeRegion : public TList<HugeRegion>::Elem { void append_it(HugeRegion *other) { this->append(other); } private: - RangeTracker<kRegionSize.in_pages().raw_num()> tracker_; + RangeTracker<kRegionSize.in_pages().raw_num()> tracker_; HugeRange location_; @@ -126,8 +126,8 @@ class HugeRegion : public TList<HugeRegion>::Elem { HugeLength nbacked_; int64_t whens_[kNumHugePages]; HugeLength total_unbacked_{NHugePages(0)}; - - MemoryModifyFunction unback_; + + MemoryModifyFunction unback_; }; // Manage a set of regions from which we allocate. @@ -152,7 +152,7 @@ class HugeRegionSet { // we managed to release. HugeLength Release(); - void Print(Printer *out) const; + void Print(Printer *out) const; void PrintInPbtxt(PbtxtRegion *hpaa) const; void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large, PageAgeHistograms *ages) const; @@ -217,13 +217,13 @@ class HugeRegionSet { }; // REQUIRES: r.len() == size(); r unbacked. -inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction unback) +inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction unback) : tracker_{}, location_(r), pages_used_{}, backed_{}, - nbacked_(NHugePages(0)), - unback_(unback) { + nbacked_(NHugePages(0)), + unback_(unback) { int64_t now = absl::base_internal::CycleClock::Now(); for (int i = 0; i < kNumHugePages; ++i) { whens_[i] = now; @@ -233,7 +233,7 @@ inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction unback) } } -inline bool HugeRegion::MaybeGet(Length n, PageId *p, bool *from_released) { +inline bool HugeRegion::MaybeGet(Length n, PageId *p, bool *from_released) { if (n > longest_free()) return false; auto index = Length(tracker_.FindAndMark(n.raw_num())); @@ -246,7 +246,7 @@ inline bool HugeRegion::MaybeGet(Length n, PageId *p, bool *from_released) { } // If release=true, release any hugepages made empty as a result. -inline void HugeRegion::Put(PageId p, Length n, bool release) { +inline void HugeRegion::Put(PageId p, Length n, bool release) { Length index = p - location_.start().first_page(); tracker_.Unmark(index.raw_num(), n.raw_num()); @@ -254,7 +254,7 @@ inline void HugeRegion::Put(PageId p, Length n, bool release) { } // Release any hugepages that are unused but backed. -inline HugeLength HugeRegion::Release() { +inline HugeLength HugeRegion::Release() { HugeLength r = NHugePages(0); bool should_unback_[kNumHugePages] = {}; for (size_t i = 0; i < kNumHugePages; ++i) { @@ -267,9 +267,9 @@ inline HugeLength HugeRegion::Release() { return r; } -inline void HugeRegion::AddSpanStats(SmallSpanStats *small, - LargeSpanStats *large, - PageAgeHistograms *ages) const { +inline void HugeRegion::AddSpanStats(SmallSpanStats *small, + LargeSpanStats *large, + PageAgeHistograms *ages) const { size_t index = 0, n; Length f, u; // This is complicated a bit by the backed/unbacked status of pages. @@ -329,7 +329,7 @@ inline void HugeRegion::AddSpanStats(SmallSpanStats *small, CHECK_CONDITION(u == unmapped_pages()); } -inline HugeLength HugeRegion::backed() const { +inline HugeLength HugeRegion::backed() const { HugeLength b; for (int i = 0; i < kNumHugePages; ++i) { if (backed_[i]) { @@ -340,7 +340,7 @@ inline HugeLength HugeRegion::backed() const { return b; } -inline void HugeRegion::Print(Printer *out) const { +inline void HugeRegion::Print(Printer *out) const { const size_t kib_used = used_pages().in_bytes() / 1024; const size_t kib_free = free_pages().in_bytes() / 1024; const size_t kib_longest_free = longest_free().in_bytes() / 1024; @@ -354,7 +354,7 @@ inline void HugeRegion::Print(Printer *out) const { total_unbacked_.in_bytes() / 1024 / 1024); } -inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const { +inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const { detail->PrintI64("used_bytes", used_pages().in_bytes()); detail->PrintI64("free_bytes", free_pages().in_bytes()); detail->PrintI64("longest_free_range_bytes", longest_free().in_bytes()); @@ -363,7 +363,7 @@ inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const { detail->PrintI64("total_unbacked_bytes", total_unbacked_.in_bytes()); } -inline BackingStats HugeRegion::stats() const { +inline BackingStats HugeRegion::stats() const { BackingStats s; s.system_bytes = location_.len().in_bytes(); s.free_bytes = free_pages().in_bytes(); @@ -371,7 +371,7 @@ inline BackingStats HugeRegion::stats() const { return s; } -inline void HugeRegion::Inc(PageId p, Length n, bool *from_released) { +inline void HugeRegion::Inc(PageId p, Length n, bool *from_released) { bool should_back = false; const int64_t now = absl::base_internal::CycleClock::Now(); while (n > Length(0)) { @@ -393,7 +393,7 @@ inline void HugeRegion::Inc(PageId p, Length n, bool *from_released) { *from_released = should_back; } -inline void HugeRegion::Dec(PageId p, Length n, bool release) { +inline void HugeRegion::Dec(PageId p, Length n, bool release) { const int64_t now = absl::base_internal::CycleClock::Now(); bool should_unback_[kNumHugePages] = {}; while (n > Length(0)) { @@ -418,7 +418,7 @@ inline void HugeRegion::Dec(PageId p, Length n, bool release) { } } -inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) { +inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) { const int64_t now = absl::base_internal::CycleClock::Now(); size_t i = 0; while (i < kNumHugePages) { @@ -436,7 +436,7 @@ inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) { HugeLength hl = NHugePages(j - i); nbacked_ -= hl; HugePage p = location_.start() + NHugePages(i); - unback_(p.start_addr(), hl.in_bytes()); + unback_(p.start_addr(), hl.in_bytes()); total_unbacked_ += hl; i = j; } @@ -491,7 +491,7 @@ inline HugeLength HugeRegionSet<Region>::Release() { } template <typename Region> -inline void HugeRegionSet<Region>::Print(Printer *out) const { +inline void HugeRegionSet<Region>::Print(Printer *out) const { out->printf("HugeRegionSet: 1 MiB+ allocations best-fit into %zu MiB slabs\n", Region::size().in_bytes() / 1024 / 1024); out->printf("HugeRegionSet: %zu total regions\n", n_); @@ -544,8 +544,8 @@ inline BackingStats HugeRegionSet<Region>::stats() const { return stats; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_REGION_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc index 4370b92762..aab450b3d0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc @@ -32,7 +32,7 @@ #include "tcmalloc/stats.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { using testing::NiceMock; @@ -43,7 +43,7 @@ class HugeRegionTest : public ::testing::Test { HugeRegionTest() : // an unlikely magic page p_(HugePageContaining(reinterpret_cast<void *>(0x1faced200000))), - region_({p_, region_.size()}, MockUnback) { + region_({p_, region_.size()}, MockUnback) { // we usually don't care about backing calls, unless testing that // specifically. mock_ = absl::make_unique<NiceMock<MockBackingInterface>>(); @@ -82,7 +82,7 @@ class HugeRegionTest : public ::testing::Test { }; HugePage p_; - typedef HugeRegion Region; + typedef HugeRegion Region; Region region_; size_t next_mark_{0}; size_t marks_[Region::size().in_pages().raw_num()]; @@ -454,13 +454,13 @@ static void NilUnback(void *p, size_t bytes) {} class HugeRegionSetTest : public testing::Test { protected: - typedef HugeRegion Region; + typedef HugeRegion Region; HugeRegionSetTest() { next_ = HugePageContaining(nullptr); } std::unique_ptr<Region> GetRegion() { - // These regions are backed by "real" memory, but we don't touch it. - std::unique_ptr<Region> r(new Region({next_, Region::size()}, NilUnback)); + // These regions are backed by "real" memory, but we don't touch it. + std::unique_ptr<Region> r(new Region({next_, Region::size()}, NilUnback)); next_ += Region::size(); return r; } @@ -528,9 +528,9 @@ TEST_F(HugeRegionSetTest, Set) { }); for (int i = 0; i < regions.size(); i++) { - Log(kLog, __FILE__, __LINE__, i, regions[i]->used_pages().raw_num(), - regions[i]->free_pages().raw_num(), - regions[i]->unmapped_pages().raw_num()); + Log(kLog, __FILE__, __LINE__, i, regions[i]->used_pages().raw_num(), + regions[i]->free_pages().raw_num(), + regions[i]->unmapped_pages().raw_num()); } // Now first two should be "full" (ish) EXPECT_LE(Region::size().in_pages().raw_num() * 0.9, @@ -555,11 +555,11 @@ TEST_F(HugeRegionSetTest, Set) { // Print out the stats for inspection of formats. std::vector<char> buf(64 * 1024); - Printer out(&buf[0], buf.size()); + Printer out(&buf[0], buf.size()); set_.Print(&out); printf("%s\n", &buf[0]); } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h index 49c95d66cb..2b83981257 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h @@ -20,9 +20,9 @@ #include <atomic> #include <type_traits> -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { namespace atomic_danger { @@ -55,6 +55,6 @@ IntType* CastToIntegral(std::atomic<IntType>* atomic_for_syscall) { } // namespace atomic_danger } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_ATOMIC_DANGER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h index da7f30646d..45cdaa6a17 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h @@ -18,9 +18,9 @@ #include <atomic> #include "absl/base/macros.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -69,6 +69,6 @@ class StatsCounter { } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc index 12a1709b34..1f1ff6986c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc @@ -1,88 +1,88 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/cache_topology.h" - -#include <fcntl.h> -#include <string.h> - -#include "absl/strings/numbers.h" -#include "absl/strings/string_view.h" -#include "tcmalloc/internal/config.h" -#include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/util.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -namespace { -int OpenSysfsCacheList(size_t cpu) { - char path[PATH_MAX]; - snprintf(path, sizeof(path), - "/sys/devices/system/cpu/cpu%zu/cache/index3/shared_cpu_list", cpu); - return signal_safe_open(path, O_RDONLY | O_CLOEXEC); -} -} // namespace - -int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current) { - // Remove all parts coming after a dash or comma. - const size_t dash = current.find('-'); - if (dash != absl::string_view::npos) current = current.substr(0, dash); - const size_t comma = current.find(','); - if (comma != absl::string_view::npos) current = current.substr(0, comma); - - int first_cpu; - CHECK_CONDITION(absl::SimpleAtoi(current, &first_cpu)); - CHECK_CONDITION(first_cpu < CPU_SETSIZE); - return first_cpu; -} - -int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]) { - int index = 0; - // Set to a sane value. - memset(l3_cache_index, 0, CPU_SETSIZE); - for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) { - const int fd = OpenSysfsCacheList(cpu); - if (fd == -1) { - // At some point we reach the number of CPU on the system, and - // we should exit. We verify that there was no other problem. - CHECK_CONDITION(errno == ENOENT); - return index; - } - // The file contains something like: - // 0-11,22-33 - // we are looking for the first number in that file. - char buf[10]; - const size_t bytes_read = - signal_safe_read(fd, buf, 10, /*bytes_read=*/nullptr); - signal_safe_close(fd); - CHECK_CONDITION(bytes_read >= 0); - - const int first_cpu = - BuildCpuToL3CacheMap_FindFirstNumberInBuf({buf, bytes_read}); - CHECK_CONDITION(first_cpu < CPU_SETSIZE); - CHECK_CONDITION(first_cpu <= cpu); - if (cpu == first_cpu) { - l3_cache_index[cpu] = index++; - } else { - l3_cache_index[cpu] = l3_cache_index[first_cpu]; - } - } - return index; -} - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/cache_topology.h" + +#include <fcntl.h> +#include <string.h> + +#include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +namespace { +int OpenSysfsCacheList(size_t cpu) { + char path[PATH_MAX]; + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%zu/cache/index3/shared_cpu_list", cpu); + return signal_safe_open(path, O_RDONLY | O_CLOEXEC); +} +} // namespace + +int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current) { + // Remove all parts coming after a dash or comma. + const size_t dash = current.find('-'); + if (dash != absl::string_view::npos) current = current.substr(0, dash); + const size_t comma = current.find(','); + if (comma != absl::string_view::npos) current = current.substr(0, comma); + + int first_cpu; + CHECK_CONDITION(absl::SimpleAtoi(current, &first_cpu)); + CHECK_CONDITION(first_cpu < CPU_SETSIZE); + return first_cpu; +} + +int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]) { + int index = 0; + // Set to a sane value. + memset(l3_cache_index, 0, CPU_SETSIZE); + for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) { + const int fd = OpenSysfsCacheList(cpu); + if (fd == -1) { + // At some point we reach the number of CPU on the system, and + // we should exit. We verify that there was no other problem. + CHECK_CONDITION(errno == ENOENT); + return index; + } + // The file contains something like: + // 0-11,22-33 + // we are looking for the first number in that file. + char buf[10]; + const size_t bytes_read = + signal_safe_read(fd, buf, 10, /*bytes_read=*/nullptr); + signal_safe_close(fd); + CHECK_CONDITION(bytes_read >= 0); + + const int first_cpu = + BuildCpuToL3CacheMap_FindFirstNumberInBuf({buf, bytes_read}); + CHECK_CONDITION(first_cpu < CPU_SETSIZE); + CHECK_CONDITION(first_cpu <= cpu); + if (cpu == first_cpu) { + l3_cache_index[cpu] = index++; + } else { + l3_cache_index[cpu] = l3_cache_index[first_cpu]; + } + } + return index; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h index 292f175470..0058f23de9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h @@ -1,36 +1,36 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ -#define TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ - -#include "tcmalloc/internal/config.h" -#include "tcmalloc/internal/util.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// Build a mapping from cpuid to the index of the L3 cache used by that cpu. -// Returns the number of caches detected. -int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]); - -// Helper function exposed to permit testing it. -int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current); - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ +#define TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ + +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/util.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Build a mapping from cpuid to the index of the L3 cache used by that cpu. +// Returns the number of caches detected. +int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]); + +// Helper function exposed to permit testing it. +int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current); + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc index 927ecace94..3145f28584 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc @@ -1,51 +1,51 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/cache_topology.h" - -#include <sched.h> - -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -namespace { - -TEST(CacheToplogy, ComputesSomethingReasonable) { - // This test verifies that each L3 cache serves the same number of CPU. This - // is not a strict requirement for the correct operation of this code, but a - // sign of sanity. - uint8_t l3_cache_index[CPU_SETSIZE]; - const int num_nodes = - tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap(l3_cache_index); - EXPECT_EQ(absl::base_internal::NumCPUs() % num_nodes, 0); - ASSERT_GT(num_nodes, 0); - static const int kMaxNodes = 256 / 8; - int count_per_node[kMaxNodes] = {0}; - for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) { - count_per_node[l3_cache_index[i]]++; - } - for (int i = 0; i < num_nodes; ++i) { - EXPECT_EQ(count_per_node[i], absl::base_internal::NumCPUs() / num_nodes); - } -} - -TEST(CacheTopology, FindFirstNumberInBuf) { - using tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap_FindFirstNumberInBuf; - EXPECT_EQ(7, BuildCpuToL3CacheMap_FindFirstNumberInBuf("7,-787")); - EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5")); - EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5-9")); - EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5,9")); -} - -} // namespace +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/cache_topology.h" + +#include <sched.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +TEST(CacheToplogy, ComputesSomethingReasonable) { + // This test verifies that each L3 cache serves the same number of CPU. This + // is not a strict requirement for the correct operation of this code, but a + // sign of sanity. + uint8_t l3_cache_index[CPU_SETSIZE]; + const int num_nodes = + tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap(l3_cache_index); + EXPECT_EQ(absl::base_internal::NumCPUs() % num_nodes, 0); + ASSERT_GT(num_nodes, 0); + static const int kMaxNodes = 256 / 8; + int count_per_node[kMaxNodes] = {0}; + for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) { + count_per_node[l3_cache_index[i]]++; + } + for (int i = 0; i < num_nodes; ++i) { + EXPECT_EQ(count_per_node[i], absl::base_internal::NumCPUs() / num_nodes); + } +} + +TEST(CacheTopology, FindFirstNumberInBuf) { + using tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap_FindFirstNumberInBuf; + EXPECT_EQ(7, BuildCpuToL3CacheMap_FindFirstNumberInBuf("7,-787")); + EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5")); + EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5-9")); + EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5,9")); +} + +} // namespace diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/clock.h b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h index 65c765203c..7d1782177d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/clock.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h @@ -1,41 +1,41 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_CLOCK_H_ -#define TCMALLOC_INTERNAL_CLOCK_H_ - -#include <stdint.h> - -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// Represents an abstract clock. The now and freq functions are analogous to -// CycleClock::Now and CycleClock::Frequency, which will be the most commonly -// used implementations. Tests can use this interface to mock out the clock. -struct Clock { - // Returns the current time in ticks (relative to an arbitrary time base). - int64_t (*now)(); - - // Returns the number of ticks per second. - double (*freq)(); -}; - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_CLOCK_H_ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_CLOCK_H_ +#define TCMALLOC_INTERNAL_CLOCK_H_ + +#include <stdint.h> + +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Represents an abstract clock. The now and freq functions are analogous to +// CycleClock::Now and CycleClock::Frequency, which will be the most commonly +// used implementations. Tests can use this interface to mock out the clock. +struct Clock { + // Returns the current time in ticks (relative to an arbitrary time base). + int64_t (*now)(); + + // Returns the number of ticks per second. + double (*freq)(); +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_CLOCK_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/config.h b/contrib/libs/tcmalloc/tcmalloc/internal/config.h index 73dbab06aa..b83fd4a13b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/config.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/config.h @@ -17,8 +17,8 @@ #include <stddef.h> -#include "absl/base/policy_checks.h" - +#include "absl/base/policy_checks.h" + // TCMALLOC_HAVE_SCHED_GETCPU is defined when the system implements // sched_getcpu(3) as by glibc and it's imitators. #if defined(__linux__) || defined(__ros__) @@ -27,63 +27,63 @@ #undef TCMALLOC_HAVE_SCHED_GETCPU #endif -// TCMALLOC_HAVE_STRUCT_MALLINFO is defined when we know that the system has -// `struct mallinfo` available. -// -// The FreeBSD libc, and subsequently macOS, does not provide the `mallopt` -// interfaces. We know that bionic, glibc (and variants), newlib, and uclibc do -// provide the `mallopt` interface. The musl libc is known to not provide the -// interface, nor does it provide a macro for checking. As a result, we -// conservatively state that `struct mallinfo` is only available on these -// environments. -#if !defined(OS_FREEBSD) && !defined(OS_MACOSX) && \ - (defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ - defined(__UCLIBC__)) -#define TCMALLOC_HAVE_STRUCT_MALLINFO 1 -#else -#undef TCMALLOC_HAVE_STRUCT_MALLINFO -#endif - -// When possible, name the text section as google_malloc. This macro should not -// be added to header files as that may move unrelated code to google_malloc -// section. -#if defined(__clang__) && defined(__linux__) -#define GOOGLE_MALLOC_SECTION_BEGIN \ - _Pragma("clang section text = \"google_malloc\"") -#define GOOGLE_MALLOC_SECTION_END _Pragma("clang section text = \"\"") -#else -#define GOOGLE_MALLOC_SECTION_BEGIN -#define GOOGLE_MALLOC_SECTION_END -#endif - -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 2) -#error "GCC 9.2 or higher is required." -#endif -#endif - -#if defined(__clang__) -#if __clang_major__ < 9 -#error "Clang 9 or higher is required." -#endif -#endif - -#if !defined(__x86_64__) && !defined(__ppc64__) && !defined(__arm__) && \ - !defined(__aarch64__) && !defined(__riscv) -#error "Unsupported architecture." -#endif - -#if !defined(__cplusplus) || __cplusplus < 201703L -#error "TCMalloc requires C++17 or later." -#else -// Also explicitly use some C++17 syntax, to prevent detect flags like -// `-Wc++14-compat`. -namespace tcmalloc::google3_requires_cpp17_or_later {} -#endif - -GOOGLE_MALLOC_SECTION_BEGIN +// TCMALLOC_HAVE_STRUCT_MALLINFO is defined when we know that the system has +// `struct mallinfo` available. +// +// The FreeBSD libc, and subsequently macOS, does not provide the `mallopt` +// interfaces. We know that bionic, glibc (and variants), newlib, and uclibc do +// provide the `mallopt` interface. The musl libc is known to not provide the +// interface, nor does it provide a macro for checking. As a result, we +// conservatively state that `struct mallinfo` is only available on these +// environments. +#if !defined(OS_FREEBSD) && !defined(OS_MACOSX) && \ + (defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ + defined(__UCLIBC__)) +#define TCMALLOC_HAVE_STRUCT_MALLINFO 1 +#else +#undef TCMALLOC_HAVE_STRUCT_MALLINFO +#endif + +// When possible, name the text section as google_malloc. This macro should not +// be added to header files as that may move unrelated code to google_malloc +// section. +#if defined(__clang__) && defined(__linux__) +#define GOOGLE_MALLOC_SECTION_BEGIN \ + _Pragma("clang section text = \"google_malloc\"") +#define GOOGLE_MALLOC_SECTION_END _Pragma("clang section text = \"\"") +#else +#define GOOGLE_MALLOC_SECTION_BEGIN +#define GOOGLE_MALLOC_SECTION_END +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 2) +#error "GCC 9.2 or higher is required." +#endif +#endif + +#if defined(__clang__) +#if __clang_major__ < 9 +#error "Clang 9 or higher is required." +#endif +#endif + +#if !defined(__x86_64__) && !defined(__ppc64__) && !defined(__arm__) && \ + !defined(__aarch64__) && !defined(__riscv) +#error "Unsupported architecture." +#endif + +#if !defined(__cplusplus) || __cplusplus < 201703L +#error "TCMalloc requires C++17 or later." +#else +// Also explicitly use some C++17 syntax, to prevent detect flags like +// `-Wc++14-compat`. +namespace tcmalloc::google3_requires_cpp17_or_later {} +#endif + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { #if defined __x86_64__ // All current and planned x86_64 processors only look at the lower 48 bits @@ -105,9 +105,9 @@ inline constexpr int kAddressBits = // AARCH64 kernel supports 48-bit virtual addresses for both user and kernel. inline constexpr int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); -#elif defined __riscv && defined __linux__ -inline constexpr int kAddressBits = - (sizeof(void *) < 8 ? (8 * sizeof(void *)) : 48); +#elif defined __riscv && defined __linux__ +inline constexpr int kAddressBits = + (sizeof(void *) < 8 ? (8 * sizeof(void *)) : 48); #else inline constexpr int kAddressBits = 8 * sizeof(void*); #endif @@ -119,8 +119,8 @@ static constexpr size_t kHugePageShift = 21; static constexpr size_t kHugePageShift = 24; #elif defined __aarch64__ && defined __linux__ static constexpr size_t kHugePageShift = 21; -#elif defined __riscv && defined __linux__ -static constexpr size_t kHugePageShift = 21; +#elif defined __riscv && defined __linux__ +static constexpr size_t kHugePageShift = 21; #else // ...whatever, guess something big-ish static constexpr size_t kHugePageShift = 21; @@ -129,8 +129,8 @@ static constexpr size_t kHugePageShift = 21; static constexpr size_t kHugePageSize = static_cast<size_t>(1) << kHugePageShift; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_CONFIG_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc index e786dd7a96..4f7e2698e3 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc @@ -15,7 +15,7 @@ #include <string.h> -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -42,4 +42,4 @@ const char* thread_safe_getenv(const char* env_var) { } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.h b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h index f54840e8d7..30d160cbff 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/environment.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h @@ -15,9 +15,9 @@ #ifndef TCMALLOC_INTERNAL_ENVIRONMENT_H_ #define TCMALLOC_INTERNAL_ENVIRONMENT_H_ -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -37,6 +37,6 @@ const char* thread_safe_getenv(const char* env_var); } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_ENVIRONMENT_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h index 514dd4a73e..936409ca7d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h @@ -1,252 +1,252 @@ -// Copyright 2020 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ -#define TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ - -#include <algorithm> -#include <cstdlib> -#include <functional> - -#include "absl/algorithm/container.h" -#include "absl/base/const_init.h" -#include "absl/base/internal/low_level_alloc.h" -#include "absl/base/internal/spinlock.h" -#include "absl/debugging/stacktrace.h" -#include "absl/hash/hash.h" -#include "absl/time/clock.h" -#include "absl/time/time.h" -#include "tcmalloc/internal/linked_list.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// Counts how many times we observed objects with a particular stack trace -// that were short lived/long lived. Each LifetimeStats object is associated -// with a particular allocation site (i.e., allocation stack trace) and each -// allocation site has at most one LifetimeStats object. All accesses to -// LifetimeStats objects need to be synchronized via the page heap lock. -class LifetimeStats : public TList<LifetimeStats>::Elem { - public: - enum class Certainty { kLowCertainty, kHighCertainty }; - enum class Prediction { kShortLived, kLongLived }; - - void Update(Prediction prediction) { - if (prediction == Prediction::kShortLived) { - short_lived_++; - } else { - long_lived_++; - } - } - - Prediction Predict(Certainty certainty) { - if (certainty == Certainty::kLowCertainty) { - return (short_lived_ > long_lived_) ? Prediction::kShortLived - : Prediction::kLongLived; - } else { - // If little data was collected, predict as long-lived (current behavior). - return (short_lived_ > (long_lived_ + 10)) ? Prediction::kShortLived - : Prediction::kLongLived; - } - } - - // Reference counts are protected by LifetimeDatabase::table_lock_. - - // Increments the reference count of this entry. - void IncRef() { ++refcount_; } - - // Returns true if and only if the reference count reaches 0. - bool DecRef() { return --refcount_ == 0; } - - private: - uint64_t refcount_ = 1; - uint64_t short_lived_ = 0; - uint64_t long_lived_ = 0; -}; - -// Manages stack traces and statistics about their associated lifetimes. Since -// the database can fill up, old entries are evicted. Evicted entries need to -// survive as long as the last lifetime tracker referencing them and are thus -// reference-counted. -class LifetimeDatabase { - public: - struct Key { - int depth; // Number of PC values stored in array below - void* stack[kMaxStackDepth]; - - // Statically instantiate at the start of the allocation to acquire - // the allocation stack trace. - Key() { depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); } - - template <typename H> - friend H AbslHashValue(H h, const Key& c) { - return H::combine(H::combine_contiguous(std::move(h), c.stack, c.depth), - c.depth); - } - - bool operator==(const Key& other) const { - if (depth != other.depth) { - return false; - } - return std::equal(stack, stack + depth, other.stack); - } - }; - - // Captures statistics associated with the low-level allocator backing the - // memory used by the database. - struct ArenaStats { - uint64_t bytes_allocated; - }; - - static constexpr int kMaxDatabaseSize = 1024; - - LifetimeDatabase() {} - ~LifetimeDatabase() {} - - // Not copyable or movable - LifetimeDatabase(const LifetimeDatabase&) = delete; - LifetimeDatabase& operator=(const LifetimeDatabase&) = delete; - - // Identifies the current stack trace and returns a handle to the lifetime - // statistics associated with this stack trace. May run outside the page heap - // lock -- we therefore need to do our own locking. This increments the - // reference count of the lifetime stats object and the caller is responsible - // for calling RemoveLifetimeStatsReference when finished with the object. - LifetimeStats* LookupOrAddLifetimeStats(Key* k) { - absl::base_internal::SpinLockHolder h(&table_lock_); - auto it = table_.find(*k); - LifetimeStats* s; - if (it == table_.end()) { - MaybeEvictLRU(); - // Allocate a new entry using the low-level allocator, which is safe - // to call from within TCMalloc. - s = stats_allocator_.allocate(1); - new (s) LifetimeStats(); - table_.insert(std::make_pair(*k, s)); - stats_fifo_.append(s); - } else { - s = it->second; - UpdateLRU(s); - } - s->IncRef(); - return s; - } - - void RemoveLifetimeStatsReference(LifetimeStats* s) { - absl::base_internal::SpinLockHolder h(&table_lock_); - if (s->DecRef()) { - stats_allocator_.deallocate(s, 1); - } - } - - size_t size() const { - absl::base_internal::SpinLockHolder h(&table_lock_); - return table_.size(); - } - - size_t evictions() const { - absl::base_internal::SpinLockHolder h(&table_lock_); - return n_evictions_; - } - - static ArenaStats* arena_stats() { - static ArenaStats stats = {0}; - return &stats; - } - - protected: - static const int kMaxStackDepth = 64; - - static absl::base_internal::LowLevelAlloc::Arena* GetArena() { - static absl::base_internal::LowLevelAlloc::Arena* arena = - absl::base_internal::LowLevelAlloc::NewArena(0); - return arena; - } - - static uint64_t bytes_allocated_ ABSL_GUARDED_BY(table_lock_); - - void UpdateLRU(LifetimeStats* stats) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) { - stats_fifo_.remove(stats); - stats_fifo_.append(stats); - } - - // If an entry is evicted, it is returned (nullptr otherwise). - void MaybeEvictLRU() ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) { - if (table_.size() < kMaxDatabaseSize) { - return; - } - n_evictions_++; - LifetimeStats* evict = stats_fifo_.first(); - stats_fifo_.remove(evict); - for (auto it = table_.begin(); it != table_.end(); ++it) { - if (it->second == evict) { - table_.erase(it); - if (evict->DecRef()) { - stats_allocator_.deallocate(evict, 1); - } - return; - } - } - CHECK_CONDITION(false); // Should not happen - } - - private: - template <typename T> - class MyAllocator : public std::allocator<T> { - public: - template <typename U> - struct rebind { - using other = MyAllocator<U>; - }; - - MyAllocator() noexcept {} - - template <typename U> - explicit MyAllocator(const MyAllocator<U>&) noexcept {} - - T* allocate(size_t num_objects, const void* = nullptr) { - size_t bytes = num_objects * sizeof(T); - arena_stats()->bytes_allocated += bytes; - return static_cast<T*>(absl::base_internal::LowLevelAlloc::AllocWithArena( - bytes, GetArena())); - } - - void deallocate(T* p, size_t num_objects) { - size_t bytes = num_objects * sizeof(T); - arena_stats()->bytes_allocated -= bytes; - absl::base_internal::LowLevelAlloc::Free(p); - } - }; - - MyAllocator<LifetimeStats> stats_allocator_ ABSL_GUARDED_BY(table_lock_); - mutable absl::base_internal::SpinLock table_lock_{ - absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; - - // Stores the current mapping from allocation site to LifetimeStats. - std::unordered_map<Key, LifetimeStats*, absl::Hash<Key>, std::equal_to<Key>, - MyAllocator<std::pair<const Key, LifetimeStats*>>> - table_ ABSL_GUARDED_BY(table_lock_); - - // Stores the entries ordered by how many times they have been accessed. - TList<LifetimeStats> stats_fifo_ ABSL_GUARDED_BY(table_lock_); - size_t n_evictions_ ABSL_GUARDED_BY(table_lock_) = 0; -}; - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ +// Copyright 2020 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ +#define TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ + +#include <algorithm> +#include <cstdlib> +#include <functional> + +#include "absl/algorithm/container.h" +#include "absl/base/const_init.h" +#include "absl/base/internal/low_level_alloc.h" +#include "absl/base/internal/spinlock.h" +#include "absl/debugging/stacktrace.h" +#include "absl/hash/hash.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/linked_list.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Counts how many times we observed objects with a particular stack trace +// that were short lived/long lived. Each LifetimeStats object is associated +// with a particular allocation site (i.e., allocation stack trace) and each +// allocation site has at most one LifetimeStats object. All accesses to +// LifetimeStats objects need to be synchronized via the page heap lock. +class LifetimeStats : public TList<LifetimeStats>::Elem { + public: + enum class Certainty { kLowCertainty, kHighCertainty }; + enum class Prediction { kShortLived, kLongLived }; + + void Update(Prediction prediction) { + if (prediction == Prediction::kShortLived) { + short_lived_++; + } else { + long_lived_++; + } + } + + Prediction Predict(Certainty certainty) { + if (certainty == Certainty::kLowCertainty) { + return (short_lived_ > long_lived_) ? Prediction::kShortLived + : Prediction::kLongLived; + } else { + // If little data was collected, predict as long-lived (current behavior). + return (short_lived_ > (long_lived_ + 10)) ? Prediction::kShortLived + : Prediction::kLongLived; + } + } + + // Reference counts are protected by LifetimeDatabase::table_lock_. + + // Increments the reference count of this entry. + void IncRef() { ++refcount_; } + + // Returns true if and only if the reference count reaches 0. + bool DecRef() { return --refcount_ == 0; } + + private: + uint64_t refcount_ = 1; + uint64_t short_lived_ = 0; + uint64_t long_lived_ = 0; +}; + +// Manages stack traces and statistics about their associated lifetimes. Since +// the database can fill up, old entries are evicted. Evicted entries need to +// survive as long as the last lifetime tracker referencing them and are thus +// reference-counted. +class LifetimeDatabase { + public: + struct Key { + int depth; // Number of PC values stored in array below + void* stack[kMaxStackDepth]; + + // Statically instantiate at the start of the allocation to acquire + // the allocation stack trace. + Key() { depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); } + + template <typename H> + friend H AbslHashValue(H h, const Key& c) { + return H::combine(H::combine_contiguous(std::move(h), c.stack, c.depth), + c.depth); + } + + bool operator==(const Key& other) const { + if (depth != other.depth) { + return false; + } + return std::equal(stack, stack + depth, other.stack); + } + }; + + // Captures statistics associated with the low-level allocator backing the + // memory used by the database. + struct ArenaStats { + uint64_t bytes_allocated; + }; + + static constexpr int kMaxDatabaseSize = 1024; + + LifetimeDatabase() {} + ~LifetimeDatabase() {} + + // Not copyable or movable + LifetimeDatabase(const LifetimeDatabase&) = delete; + LifetimeDatabase& operator=(const LifetimeDatabase&) = delete; + + // Identifies the current stack trace and returns a handle to the lifetime + // statistics associated with this stack trace. May run outside the page heap + // lock -- we therefore need to do our own locking. This increments the + // reference count of the lifetime stats object and the caller is responsible + // for calling RemoveLifetimeStatsReference when finished with the object. + LifetimeStats* LookupOrAddLifetimeStats(Key* k) { + absl::base_internal::SpinLockHolder h(&table_lock_); + auto it = table_.find(*k); + LifetimeStats* s; + if (it == table_.end()) { + MaybeEvictLRU(); + // Allocate a new entry using the low-level allocator, which is safe + // to call from within TCMalloc. + s = stats_allocator_.allocate(1); + new (s) LifetimeStats(); + table_.insert(std::make_pair(*k, s)); + stats_fifo_.append(s); + } else { + s = it->second; + UpdateLRU(s); + } + s->IncRef(); + return s; + } + + void RemoveLifetimeStatsReference(LifetimeStats* s) { + absl::base_internal::SpinLockHolder h(&table_lock_); + if (s->DecRef()) { + stats_allocator_.deallocate(s, 1); + } + } + + size_t size() const { + absl::base_internal::SpinLockHolder h(&table_lock_); + return table_.size(); + } + + size_t evictions() const { + absl::base_internal::SpinLockHolder h(&table_lock_); + return n_evictions_; + } + + static ArenaStats* arena_stats() { + static ArenaStats stats = {0}; + return &stats; + } + + protected: + static const int kMaxStackDepth = 64; + + static absl::base_internal::LowLevelAlloc::Arena* GetArena() { + static absl::base_internal::LowLevelAlloc::Arena* arena = + absl::base_internal::LowLevelAlloc::NewArena(0); + return arena; + } + + static uint64_t bytes_allocated_ ABSL_GUARDED_BY(table_lock_); + + void UpdateLRU(LifetimeStats* stats) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) { + stats_fifo_.remove(stats); + stats_fifo_.append(stats); + } + + // If an entry is evicted, it is returned (nullptr otherwise). + void MaybeEvictLRU() ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) { + if (table_.size() < kMaxDatabaseSize) { + return; + } + n_evictions_++; + LifetimeStats* evict = stats_fifo_.first(); + stats_fifo_.remove(evict); + for (auto it = table_.begin(); it != table_.end(); ++it) { + if (it->second == evict) { + table_.erase(it); + if (evict->DecRef()) { + stats_allocator_.deallocate(evict, 1); + } + return; + } + } + CHECK_CONDITION(false); // Should not happen + } + + private: + template <typename T> + class MyAllocator : public std::allocator<T> { + public: + template <typename U> + struct rebind { + using other = MyAllocator<U>; + }; + + MyAllocator() noexcept {} + + template <typename U> + explicit MyAllocator(const MyAllocator<U>&) noexcept {} + + T* allocate(size_t num_objects, const void* = nullptr) { + size_t bytes = num_objects * sizeof(T); + arena_stats()->bytes_allocated += bytes; + return static_cast<T*>(absl::base_internal::LowLevelAlloc::AllocWithArena( + bytes, GetArena())); + } + + void deallocate(T* p, size_t num_objects) { + size_t bytes = num_objects * sizeof(T); + arena_stats()->bytes_allocated -= bytes; + absl::base_internal::LowLevelAlloc::Free(p); + } + }; + + MyAllocator<LifetimeStats> stats_allocator_ ABSL_GUARDED_BY(table_lock_); + mutable absl::base_internal::SpinLock table_lock_{ + absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY}; + + // Stores the current mapping from allocation site to LifetimeStats. + std::unordered_map<Key, LifetimeStats*, absl::Hash<Key>, std::equal_to<Key>, + MyAllocator<std::pair<const Key, LifetimeStats*>>> + table_ ABSL_GUARDED_BY(table_lock_); + + // Stores the entries ordered by how many times they have been accessed. + TList<LifetimeStats> stats_fifo_ ABSL_GUARDED_BY(table_lock_); + size_t n_evictions_ ABSL_GUARDED_BY(table_lock_) = 0; +}; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc index 4280890afe..7562c97858 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc @@ -1,156 +1,156 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/lifetime_predictions.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "tcmalloc/testing/testutil.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -class LifetimeDatabaseTest : public testing::Test { - protected: - LifetimeDatabase lifetime_database_; - - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateA() { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } - - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateB() { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } - - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateWithStacktraceId(int id) { - if (id == 0) { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } else if (id % 2 == 0) { - return AllocateWithStacktraceId(id / 2); - } else { - return AllocateWithStacktraceId_2(id / 2); - } - } - - // Record a sufficiently large number of short-lived allocations to make - // a prediction short-lived, absent any long-lived allocations. - void MakeShortLived(LifetimeStats* stats, bool high_certainty) { - for (int i = 0; i < (high_certainty ? 100 : 2); i++) { - stats->Update(LifetimeStats::Prediction::kShortLived); - } - } - - private: - ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* - AllocateWithStacktraceId_2(int id) { - if (id == 0) { - LifetimeDatabase::Key key; - return lifetime_database_.LookupOrAddLifetimeStats(&key); - } else if (id % 2 == 0) { - return AllocateWithStacktraceId(id / 2); - } else { - return AllocateWithStacktraceId_2(id / 2); - } - } -}; - -TEST_F(LifetimeDatabaseTest, Basic) { - PRAGMA_NO_UNROLL - for (int i = 0; i < 2; i++) { - LifetimeStats* r1 = AllocateA(); - LifetimeStats* r2 = AllocateB(); - LifetimeStats* r3 = AllocateB(); - ASSERT_NE(nullptr, r1); - ASSERT_NE(nullptr, r2); - ASSERT_NE(nullptr, r3); - - // First iteration: set short-lived count. - if (i == 0) { - MakeShortLived(r1, false); - MakeShortLived(r2, true); - } else { - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - r1->Predict(LifetimeStats::Certainty::kLowCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - r1->Predict(LifetimeStats::Certainty::kHighCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - r2->Predict(LifetimeStats::Certainty::kLowCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - r2->Predict(LifetimeStats::Certainty::kHighCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - r3->Predict(LifetimeStats::Certainty::kLowCertainty)); - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - r3->Predict(LifetimeStats::Certainty::kHighCertainty)); - } - - lifetime_database_.RemoveLifetimeStatsReference(r1); - lifetime_database_.RemoveLifetimeStatsReference(r2); - lifetime_database_.RemoveLifetimeStatsReference(r3); - } -} - -TEST_F(LifetimeDatabaseTest, Eviction) { - const int kEntries = 5 * LifetimeDatabase::kMaxDatabaseSize; - - std::vector<LifetimeStats*> refs; - - PRAGMA_NO_UNROLL - for (int i = 0; i < kEntries; i++) { - LifetimeStats* r = AllocateWithStacktraceId(i); - refs.push_back(r); - - ASSERT_NE(nullptr, r); - if (i < LifetimeDatabase::kMaxDatabaseSize) { - MakeShortLived(r, true); - } - } - - // Check that even evicted entries are still accessible due to refcounts. - for (int i = 0; i < kEntries; i++) { - if (i < LifetimeDatabase::kMaxDatabaseSize) { - EXPECT_EQ(LifetimeStats::Prediction::kShortLived, - refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty)); - } else { - EXPECT_EQ(LifetimeStats::Prediction::kLongLived, - refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty)); - } - } - - EXPECT_EQ(LifetimeDatabase::kMaxDatabaseSize, lifetime_database_.size()); - EXPECT_EQ(kEntries - LifetimeDatabase::kMaxDatabaseSize, - lifetime_database_.evictions()); - - uint64_t before_bytes = lifetime_database_.arena_stats()->bytes_allocated; - - // Return all of the references, which should drop the remaining refcounts. - for (int i = 0; i < kEntries; i++) { - lifetime_database_.RemoveLifetimeStatsReference(refs[i]); - } - - uint64_t after_bytes = lifetime_database_.arena_stats()->bytes_allocated; - - // Check that this freed up memory - EXPECT_LT(after_bytes, before_bytes); -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/lifetime_predictions.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "tcmalloc/testing/testutil.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +class LifetimeDatabaseTest : public testing::Test { + protected: + LifetimeDatabase lifetime_database_; + + ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* + AllocateA() { + LifetimeDatabase::Key key; + return lifetime_database_.LookupOrAddLifetimeStats(&key); + } + + ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* + AllocateB() { + LifetimeDatabase::Key key; + return lifetime_database_.LookupOrAddLifetimeStats(&key); + } + + ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* + AllocateWithStacktraceId(int id) { + if (id == 0) { + LifetimeDatabase::Key key; + return lifetime_database_.LookupOrAddLifetimeStats(&key); + } else if (id % 2 == 0) { + return AllocateWithStacktraceId(id / 2); + } else { + return AllocateWithStacktraceId_2(id / 2); + } + } + + // Record a sufficiently large number of short-lived allocations to make + // a prediction short-lived, absent any long-lived allocations. + void MakeShortLived(LifetimeStats* stats, bool high_certainty) { + for (int i = 0; i < (high_certainty ? 100 : 2); i++) { + stats->Update(LifetimeStats::Prediction::kShortLived); + } + } + + private: + ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats* + AllocateWithStacktraceId_2(int id) { + if (id == 0) { + LifetimeDatabase::Key key; + return lifetime_database_.LookupOrAddLifetimeStats(&key); + } else if (id % 2 == 0) { + return AllocateWithStacktraceId(id / 2); + } else { + return AllocateWithStacktraceId_2(id / 2); + } + } +}; + +TEST_F(LifetimeDatabaseTest, Basic) { + PRAGMA_NO_UNROLL + for (int i = 0; i < 2; i++) { + LifetimeStats* r1 = AllocateA(); + LifetimeStats* r2 = AllocateB(); + LifetimeStats* r3 = AllocateB(); + ASSERT_NE(nullptr, r1); + ASSERT_NE(nullptr, r2); + ASSERT_NE(nullptr, r3); + + // First iteration: set short-lived count. + if (i == 0) { + MakeShortLived(r1, false); + MakeShortLived(r2, true); + } else { + EXPECT_EQ(LifetimeStats::Prediction::kShortLived, + r1->Predict(LifetimeStats::Certainty::kLowCertainty)); + EXPECT_EQ(LifetimeStats::Prediction::kLongLived, + r1->Predict(LifetimeStats::Certainty::kHighCertainty)); + EXPECT_EQ(LifetimeStats::Prediction::kShortLived, + r2->Predict(LifetimeStats::Certainty::kLowCertainty)); + EXPECT_EQ(LifetimeStats::Prediction::kShortLived, + r2->Predict(LifetimeStats::Certainty::kHighCertainty)); + EXPECT_EQ(LifetimeStats::Prediction::kLongLived, + r3->Predict(LifetimeStats::Certainty::kLowCertainty)); + EXPECT_EQ(LifetimeStats::Prediction::kLongLived, + r3->Predict(LifetimeStats::Certainty::kHighCertainty)); + } + + lifetime_database_.RemoveLifetimeStatsReference(r1); + lifetime_database_.RemoveLifetimeStatsReference(r2); + lifetime_database_.RemoveLifetimeStatsReference(r3); + } +} + +TEST_F(LifetimeDatabaseTest, Eviction) { + const int kEntries = 5 * LifetimeDatabase::kMaxDatabaseSize; + + std::vector<LifetimeStats*> refs; + + PRAGMA_NO_UNROLL + for (int i = 0; i < kEntries; i++) { + LifetimeStats* r = AllocateWithStacktraceId(i); + refs.push_back(r); + + ASSERT_NE(nullptr, r); + if (i < LifetimeDatabase::kMaxDatabaseSize) { + MakeShortLived(r, true); + } + } + + // Check that even evicted entries are still accessible due to refcounts. + for (int i = 0; i < kEntries; i++) { + if (i < LifetimeDatabase::kMaxDatabaseSize) { + EXPECT_EQ(LifetimeStats::Prediction::kShortLived, + refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty)); + } else { + EXPECT_EQ(LifetimeStats::Prediction::kLongLived, + refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty)); + } + } + + EXPECT_EQ(LifetimeDatabase::kMaxDatabaseSize, lifetime_database_.size()); + EXPECT_EQ(kEntries - LifetimeDatabase::kMaxDatabaseSize, + lifetime_database_.evictions()); + + uint64_t before_bytes = lifetime_database_.arena_stats()->bytes_allocated; + + // Return all of the references, which should drop the remaining refcounts. + for (int i = 0; i < kEntries; i++) { + lifetime_database_.RemoveLifetimeStatsReference(refs[i]); + } + + uint64_t after_bytes = lifetime_database_.arena_stats()->bytes_allocated; + + // Check that this freed up memory + EXPECT_LT(after_bytes, before_bytes); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h index d348dbe609..d84c08d287 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h @@ -1,172 +1,172 @@ -// Copyright 2020 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ -#define TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ - -#include "absl/base/internal/cycleclock.h" -#include "absl/time/time.h" -#include "tcmalloc/internal/clock.h" -#include "tcmalloc/internal/lifetime_predictions.h" -#include "tcmalloc/internal/linked_list.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -template <typename LifetimeDatabaseT, typename LifetimeStatsT> -class LifetimeTrackerImpl { - public: - // A tracker is attached to an individual allocation and tracks its lifetime. - // This allocation can either be in a region or in the filler. It contains - // a pointer back to the LifetimeStats of the allocation site that generated - // this allocation, so that statistics can be updated. - struct Tracker : public TList<Tracker>::Elem { - // The deadline after which the object is considered long-lived. - uint64_t deadline = 0; - - // If the allocation is associated with a counterfactual, this contains - // the hypothetical location in the short-lived region (null otherwise). - void* counterfactual_ptr = nullptr; - - // Lifetime statistics associated with this allocation (will be updated when - // the lifetime is known). - LifetimeStatsT* lifetime; - - // The allocation this stat belongs to was predicted short-lived. - bool predicted_short_lived = false; - - // Is this element currently tracked by the lifetime tracker? - bool is_tracked() { return deadline != 0; } - - // Reset the element (implies not tracked). - void reset() { - deadline = 0; - counterfactual_ptr = nullptr; - } - }; - - struct Stats { - uint64_t expired_lifetimes = 0; - uint64_t overestimated_lifetimes = 0; - uint64_t short_lived_predictions = 0; - uint64_t long_lived_predictions = 0; - }; - - explicit LifetimeTrackerImpl( - LifetimeDatabaseT* lifetime_database, absl::Duration timeout, - Clock clock = Clock{.now = absl::base_internal::CycleClock::Now, - .freq = absl::base_internal::CycleClock::Frequency}) - : timeout_(absl::ToDoubleSeconds(timeout) * clock.freq()), - lifetime_database_(*lifetime_database), - clock_(clock) {} - - // Registers a donated allocation with the tracker. - void AddAllocation(Tracker* tracker, LifetimeStatsT* lifetime, - bool predicted_short_lived) { - CheckForLifetimeExpirations(); - - if (predicted_short_lived) { - stats_.short_lived_predictions++; - } else { - stats_.long_lived_predictions++; - } - - ASSERT(tracker != nullptr); - ASSERT(lifetime != nullptr); - tracker->deadline = clock_.now() + timeout_; - tracker->lifetime = lifetime; - tracker->predicted_short_lived = predicted_short_lived; - list_.append(tracker); - } - - // Remove an allocation from the tracker. This will stop tracking the - // allocation and record whether it was correctly predicted. - void RemoveAllocation(Tracker* tracker) { - CheckForLifetimeExpirations(); - - // This is not tracked anymore. - if (!tracker->is_tracked()) { - return; - } - - if (!tracker->predicted_short_lived) { - stats_.overestimated_lifetimes++; - } - - if (tracker->lifetime != nullptr) { - tracker->lifetime->Update(LifetimeStatsT::Prediction::kShortLived); - lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime); - } - - tracker->reset(); - - list_.remove(tracker); - } - - // Check whether any lifetimes in the tracker have passed the threshold after - // which they are not short-lived anymore. - void CheckForLifetimeExpirations() { - // TODO(mmaas): Expirations are fairly cheap, but there is a theoretical - // possibility of having an arbitrary number of expirations at once, which - // could affect tail latency. We may want to limit the number of pages we - // let expire per unit time. - uint64_t now = clock_.now(); - Tracker* tracker = TryGetExpired(now); - while (tracker != nullptr) { - ASSERT(tracker->is_tracked()); - - // A page that was predicted short-lived was actually long-lived. - if (tracker->predicted_short_lived) { - stats_.expired_lifetimes++; - } - - if (tracker->lifetime != nullptr) { - tracker->lifetime->Update(LifetimeStatsT::Prediction::kLongLived); - lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime); - } - - tracker->reset(); - tracker = TryGetExpired(now); - } - } - - Stats stats() const { return stats_; } - - private: - // Returns the earliest expiring entry, or nullptr if none expired. - Tracker* TryGetExpired(uint64_t now) { - if (!list_.empty() && list_.first()->deadline < now) { - Tracker* s = list_.first(); - list_.remove(s); - return s; - } - return nullptr; - } - - const uint64_t timeout_; - - TList<Tracker> list_; - Stats stats_; - LifetimeDatabaseT& lifetime_database_; - Clock clock_; -}; - -using LifetimeTracker = LifetimeTrackerImpl<LifetimeDatabase, LifetimeStats>; - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ +// Copyright 2020 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ +#define TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ + +#include "absl/base/internal/cycleclock.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/lifetime_predictions.h" +#include "tcmalloc/internal/linked_list.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +template <typename LifetimeDatabaseT, typename LifetimeStatsT> +class LifetimeTrackerImpl { + public: + // A tracker is attached to an individual allocation and tracks its lifetime. + // This allocation can either be in a region or in the filler. It contains + // a pointer back to the LifetimeStats of the allocation site that generated + // this allocation, so that statistics can be updated. + struct Tracker : public TList<Tracker>::Elem { + // The deadline after which the object is considered long-lived. + uint64_t deadline = 0; + + // If the allocation is associated with a counterfactual, this contains + // the hypothetical location in the short-lived region (null otherwise). + void* counterfactual_ptr = nullptr; + + // Lifetime statistics associated with this allocation (will be updated when + // the lifetime is known). + LifetimeStatsT* lifetime; + + // The allocation this stat belongs to was predicted short-lived. + bool predicted_short_lived = false; + + // Is this element currently tracked by the lifetime tracker? + bool is_tracked() { return deadline != 0; } + + // Reset the element (implies not tracked). + void reset() { + deadline = 0; + counterfactual_ptr = nullptr; + } + }; + + struct Stats { + uint64_t expired_lifetimes = 0; + uint64_t overestimated_lifetimes = 0; + uint64_t short_lived_predictions = 0; + uint64_t long_lived_predictions = 0; + }; + + explicit LifetimeTrackerImpl( + LifetimeDatabaseT* lifetime_database, absl::Duration timeout, + Clock clock = Clock{.now = absl::base_internal::CycleClock::Now, + .freq = absl::base_internal::CycleClock::Frequency}) + : timeout_(absl::ToDoubleSeconds(timeout) * clock.freq()), + lifetime_database_(*lifetime_database), + clock_(clock) {} + + // Registers a donated allocation with the tracker. + void AddAllocation(Tracker* tracker, LifetimeStatsT* lifetime, + bool predicted_short_lived) { + CheckForLifetimeExpirations(); + + if (predicted_short_lived) { + stats_.short_lived_predictions++; + } else { + stats_.long_lived_predictions++; + } + + ASSERT(tracker != nullptr); + ASSERT(lifetime != nullptr); + tracker->deadline = clock_.now() + timeout_; + tracker->lifetime = lifetime; + tracker->predicted_short_lived = predicted_short_lived; + list_.append(tracker); + } + + // Remove an allocation from the tracker. This will stop tracking the + // allocation and record whether it was correctly predicted. + void RemoveAllocation(Tracker* tracker) { + CheckForLifetimeExpirations(); + + // This is not tracked anymore. + if (!tracker->is_tracked()) { + return; + } + + if (!tracker->predicted_short_lived) { + stats_.overestimated_lifetimes++; + } + + if (tracker->lifetime != nullptr) { + tracker->lifetime->Update(LifetimeStatsT::Prediction::kShortLived); + lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime); + } + + tracker->reset(); + + list_.remove(tracker); + } + + // Check whether any lifetimes in the tracker have passed the threshold after + // which they are not short-lived anymore. + void CheckForLifetimeExpirations() { + // TODO(mmaas): Expirations are fairly cheap, but there is a theoretical + // possibility of having an arbitrary number of expirations at once, which + // could affect tail latency. We may want to limit the number of pages we + // let expire per unit time. + uint64_t now = clock_.now(); + Tracker* tracker = TryGetExpired(now); + while (tracker != nullptr) { + ASSERT(tracker->is_tracked()); + + // A page that was predicted short-lived was actually long-lived. + if (tracker->predicted_short_lived) { + stats_.expired_lifetimes++; + } + + if (tracker->lifetime != nullptr) { + tracker->lifetime->Update(LifetimeStatsT::Prediction::kLongLived); + lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime); + } + + tracker->reset(); + tracker = TryGetExpired(now); + } + } + + Stats stats() const { return stats_; } + + private: + // Returns the earliest expiring entry, or nullptr if none expired. + Tracker* TryGetExpired(uint64_t now) { + if (!list_.empty() && list_.first()->deadline < now) { + Tracker* s = list_.first(); + list_.remove(s); + return s; + } + return nullptr; + } + + const uint64_t timeout_; + + TList<Tracker> list_; + Stats stats_; + LifetimeDatabaseT& lifetime_database_; + Clock clock_; +}; + +using LifetimeTracker = LifetimeTrackerImpl<LifetimeDatabase, LifetimeStats>; + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc index 78ed38ecae..6435a04e69 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc @@ -1,129 +1,129 @@ -// Copyright 2019 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/lifetime_tracker.h" - -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "absl/time/time.h" -#include "tcmalloc/internal/lifetime_predictions.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -class MockLifetimeStats { - public: - enum class Prediction { kShortLived, kLongLived }; - MOCK_METHOD(void, Update, (Prediction prediction), ()); -}; - -class MockLifetimeDatabase { - public: - MOCK_METHOD(void, RemoveLifetimeStatsReference, (MockLifetimeStats*), ()); -}; - -using LifetimeTrackerUnderTest = - LifetimeTrackerImpl<MockLifetimeDatabase, MockLifetimeStats>; - -class LifetimeTrackerTest : public testing::Test { - protected: - const Clock kFakeClock = - Clock{.now = FakeClock, .freq = GetFakeClockFrequency}; - - void Advance(absl::Duration d) { - clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency(); - } - - private: - static int64_t FakeClock() { return clock_; } - - static double GetFakeClockFrequency() { - return absl::ToDoubleNanoseconds(absl::Seconds(2)); - } - - static int64_t clock_; -}; - -int64_t LifetimeTrackerTest::clock_{0}; - -TEST_F(LifetimeTrackerTest, Basic) { - MockLifetimeDatabase database; - LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock); - MockLifetimeStats stats; - - LifetimeTrackerUnderTest::Tracker tracker1; - tracker.AddAllocation(&tracker1, &stats, false); - Advance(absl::Seconds(1)); - - EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kLongLived)); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats)); - - LifetimeTrackerUnderTest::Tracker tracker2; - tracker.AddAllocation(&tracker2, &stats, false); - - EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kShortLived)); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats)); - - Advance(absl::Seconds(0.1)); - tracker.RemoveAllocation(&tracker2); - - EXPECT_EQ(tracker.stats().expired_lifetimes, 0); - EXPECT_EQ(tracker.stats().overestimated_lifetimes, 1); - EXPECT_EQ(tracker.stats().short_lived_predictions, 0); - EXPECT_EQ(tracker.stats().long_lived_predictions, 2); -} - -TEST_F(LifetimeTrackerTest, ExpirationLogic) { - MockLifetimeDatabase database; - LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock); - - // Create 100 trackers, all predicted short-lived. Every second tracker will - // be long-lived and therefore expire. - const int kNumTrackers = 100; - std::vector<LifetimeTrackerUnderTest::Tracker> trackers(kNumTrackers); - MockLifetimeStats stats[] = {MockLifetimeStats(), MockLifetimeStats()}; - - for (int i = 0; i < kNumTrackers; ++i) { - tracker.AddAllocation(&trackers[i], &stats[i % 2], true); - Advance(absl::Milliseconds(1)); - } - - EXPECT_CALL(stats[0], Update(MockLifetimeStats::Prediction::kShortLived)) - .Times(kNumTrackers / 2); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[0])) - .Times(kNumTrackers / 2); - - for (int i = 0; i < kNumTrackers; i += 2) { - tracker.RemoveAllocation(&trackers[i]); - } - - // After an additional 450ms, 1/4 of the allocations should have expired. - EXPECT_CALL(stats[1], Update(MockLifetimeStats::Prediction::kLongLived)) - .Times(kNumTrackers / 4); - EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[1])) - .Times(kNumTrackers / 4); - - Advance(absl::Milliseconds(450)); - tracker.CheckForLifetimeExpirations(); - - EXPECT_EQ(tracker.stats().expired_lifetimes, kNumTrackers / 4); - EXPECT_EQ(tracker.stats().overestimated_lifetimes, 0); - EXPECT_EQ(tracker.stats().short_lived_predictions, kNumTrackers); - EXPECT_EQ(tracker.stats().long_lived_predictions, 0); -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc +// Copyright 2019 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/lifetime_tracker.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "tcmalloc/internal/lifetime_predictions.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +class MockLifetimeStats { + public: + enum class Prediction { kShortLived, kLongLived }; + MOCK_METHOD(void, Update, (Prediction prediction), ()); +}; + +class MockLifetimeDatabase { + public: + MOCK_METHOD(void, RemoveLifetimeStatsReference, (MockLifetimeStats*), ()); +}; + +using LifetimeTrackerUnderTest = + LifetimeTrackerImpl<MockLifetimeDatabase, MockLifetimeStats>; + +class LifetimeTrackerTest : public testing::Test { + protected: + const Clock kFakeClock = + Clock{.now = FakeClock, .freq = GetFakeClockFrequency}; + + void Advance(absl::Duration d) { + clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency(); + } + + private: + static int64_t FakeClock() { return clock_; } + + static double GetFakeClockFrequency() { + return absl::ToDoubleNanoseconds(absl::Seconds(2)); + } + + static int64_t clock_; +}; + +int64_t LifetimeTrackerTest::clock_{0}; + +TEST_F(LifetimeTrackerTest, Basic) { + MockLifetimeDatabase database; + LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock); + MockLifetimeStats stats; + + LifetimeTrackerUnderTest::Tracker tracker1; + tracker.AddAllocation(&tracker1, &stats, false); + Advance(absl::Seconds(1)); + + EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kLongLived)); + EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats)); + + LifetimeTrackerUnderTest::Tracker tracker2; + tracker.AddAllocation(&tracker2, &stats, false); + + EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kShortLived)); + EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats)); + + Advance(absl::Seconds(0.1)); + tracker.RemoveAllocation(&tracker2); + + EXPECT_EQ(tracker.stats().expired_lifetimes, 0); + EXPECT_EQ(tracker.stats().overestimated_lifetimes, 1); + EXPECT_EQ(tracker.stats().short_lived_predictions, 0); + EXPECT_EQ(tracker.stats().long_lived_predictions, 2); +} + +TEST_F(LifetimeTrackerTest, ExpirationLogic) { + MockLifetimeDatabase database; + LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock); + + // Create 100 trackers, all predicted short-lived. Every second tracker will + // be long-lived and therefore expire. + const int kNumTrackers = 100; + std::vector<LifetimeTrackerUnderTest::Tracker> trackers(kNumTrackers); + MockLifetimeStats stats[] = {MockLifetimeStats(), MockLifetimeStats()}; + + for (int i = 0; i < kNumTrackers; ++i) { + tracker.AddAllocation(&trackers[i], &stats[i % 2], true); + Advance(absl::Milliseconds(1)); + } + + EXPECT_CALL(stats[0], Update(MockLifetimeStats::Prediction::kShortLived)) + .Times(kNumTrackers / 2); + EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[0])) + .Times(kNumTrackers / 2); + + for (int i = 0; i < kNumTrackers; i += 2) { + tracker.RemoveAllocation(&trackers[i]); + } + + // After an additional 450ms, 1/4 of the allocations should have expired. + EXPECT_CALL(stats[1], Update(MockLifetimeStats::Prediction::kLongLived)) + .Times(kNumTrackers / 4); + EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[1])) + .Times(kNumTrackers / 4); + + Advance(absl::Milliseconds(450)); + tracker.CheckForLifetimeExpirations(); + + EXPECT_EQ(tracker.stats().expired_lifetimes, kNumTrackers / 4); + EXPECT_EQ(tracker.stats().overestimated_lifetimes, 0); + EXPECT_EQ(tracker.stats().short_lived_predictions, kNumTrackers); + EXPECT_EQ(tracker.stats().long_lived_predictions, 0); +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h index 181a480275..eba71c70c2 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h @@ -25,9 +25,9 @@ #include "absl/base/optimization.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* SLL_Next(void* t) { return *(reinterpret_cast<void**>(t)); @@ -189,16 +189,16 @@ class TList { } // Returns first element in the list. The list must not be empty. - ABSL_ATTRIBUTE_RETURNS_NONNULL T* first() const { + ABSL_ATTRIBUTE_RETURNS_NONNULL T* first() const { ASSERT(!empty()); - ASSERT(head_.next_ != nullptr); + ASSERT(head_.next_ != nullptr); return static_cast<T*>(head_.next_); } // Returns last element in the list. The list must not be empty. - ABSL_ATTRIBUTE_RETURNS_NONNULL T* last() const { + ABSL_ATTRIBUTE_RETURNS_NONNULL T* last() const { ASSERT(!empty()); - ASSERT(head_.prev_ != nullptr); + ASSERT(head_.prev_ != nullptr); return static_cast<T*>(head_.prev_); } @@ -247,8 +247,8 @@ class TList { Elem head_; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_LINKED_LIST_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc index 505b1b62c2..613eccc5c0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc @@ -23,9 +23,9 @@ #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/mock_span.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { void BM_PushPop(benchmark::State& state) { @@ -141,6 +141,6 @@ static void BM_AppendRemove(benchmark::State& state) { BENCHMARK(BM_AppendRemove)->Range(32, 32 * 1024); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc index 3299bca8d8..bfc1a73966 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc @@ -20,14 +20,14 @@ #include <vector> #include "gtest/gtest.h" -#include "absl/container/flat_hash_set.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/node_hash_set.h" #include "absl/random/random.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/internal/mock_span.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class LinkedListTest : public ::testing::Test { @@ -76,7 +76,7 @@ TEST_F(LinkedListTest, PushPop) { // the batch is maintained. TEST_F(LinkedListTest, PushPopBatch) { const std::vector<int> batch_sizes{1, 3, 5, 7, 10, 16}; - absl::flat_hash_set<void*> pushed; + absl::flat_hash_set<void*> pushed; size_t length = 0; for (int batch_size : batch_sizes) { @@ -96,7 +96,7 @@ TEST_F(LinkedListTest, PushPopBatch) { EXPECT_EQ(length == 0, list_.empty()); } - absl::flat_hash_set<void*> popped; + absl::flat_hash_set<void*> popped; for (int batch_size : batch_sizes) { std::vector<void*> batch(batch_size, nullptr); list_.PopBatch(batch_size, batch.data()); @@ -235,5 +235,5 @@ TEST_F(TListTest, AppendRandomRemove) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc index 2b70bc1502..b90ab85f7a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc @@ -31,10 +31,10 @@ #include "tcmalloc/internal/parameter_accessors.h" #include "tcmalloc/malloc_extension.h" -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + // Variables for storing crash output. Allocated statically since we // may not be able to heap-allocate while crashing. ABSL_CONST_INIT static absl::base_internal::SpinLock crash_lock( @@ -82,7 +82,7 @@ static Logger FormatLog(bool with_stack, const char* filename, int line, if (with_stack) { state.trace.depth = - absl::GetStackTrace(state.trace.stack, kMaxStackDepth, 1); + absl::GetStackTrace(state.trace.stack, kMaxStackDepth, 1); state.Add(LogItem("@")); for (int i = 0; i < state.trace.depth; i++) { state.Add(LogItem(state.trace.stack[i])); @@ -210,7 +210,7 @@ bool Logger::AddNum(uint64_t num, int base) { return AddStr(pos, end - pos); } -PbtxtRegion::PbtxtRegion(Printer* out, PbtxtRegionType type, int indent) +PbtxtRegion::PbtxtRegion(Printer* out, PbtxtRegionType type, int indent) : out_(out), type_(type), indent_(indent) { switch (type_) { case kTop: @@ -270,7 +270,7 @@ PbtxtRegion PbtxtRegion::CreateSubRegion(absl::string_view key) { PbtxtRegion sub(out_, kNested, indent_); return sub; } - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.h b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h index 4d42aa40a9..252568fda2 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h @@ -24,7 +24,7 @@ #include "absl/base/optimization.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" //------------------------------------------------------------------- // Utility routines @@ -37,9 +37,9 @@ // Example: // Log(kLog, __FILE__, __LINE__, "error", bytes); -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { static constexpr int kMaxStackDepth = 64; @@ -59,7 +59,7 @@ struct StackTrace { uintptr_t requested_size; uintptr_t requested_alignment; uintptr_t allocated_size; // size after sizeclass/page rounding - + uintptr_t depth; // Number of PC values stored in array below void* stack[kMaxStackDepth]; @@ -75,8 +75,8 @@ struct StackTrace { // produce a hasher for the fields used as keys. return H::combine(H::combine_contiguous(std::move(h), t.stack, t.depth), t.depth, t.requested_size, t.requested_alignment, - t.allocated_size - ); + t.allocated_size + ); } }; @@ -130,11 +130,11 @@ extern void (*log_message_writer)(const char* msg, int length); // Like assert(), but executed even in NDEBUG mode #undef CHECK_CONDITION -#define CHECK_CONDITION(cond) \ - (ABSL_PREDICT_TRUE(cond) ? (void)0 \ - : (::tcmalloc::tcmalloc_internal::Crash( \ - ::tcmalloc::tcmalloc_internal::kCrash, \ - __FILE__, __LINE__, #cond))) +#define CHECK_CONDITION(cond) \ + (ABSL_PREDICT_TRUE(cond) ? (void)0 \ + : (::tcmalloc::tcmalloc_internal::Crash( \ + ::tcmalloc::tcmalloc_internal::kCrash, \ + __FILE__, __LINE__, #cond))) // Our own version of assert() so we can avoid hanging by trying to do // all kinds of goofy printing while holding the malloc lock. @@ -145,7 +145,7 @@ extern void (*log_message_writer)(const char* msg, int length); #endif // Print into buffer -class Printer { +class Printer { private: char* buf_; // Where should we write next int left_; // Space left in buffer (including space for \0) @@ -154,7 +154,7 @@ class Printer { public: // REQUIRES: "length > 0" - Printer(char* buf, int length) : buf_(buf), left_(length), required_(0) { + Printer(char* buf, int length) : buf_(buf), left_(length), required_(0) { ASSERT(length > 0); buf[0] = '\0'; } @@ -191,7 +191,7 @@ enum PbtxtRegionType { kTop, kNested }; // brackets). class PbtxtRegion { public: - PbtxtRegion(Printer* out, PbtxtRegionType type, int indent); + PbtxtRegion(Printer* out, PbtxtRegionType type, int indent); ~PbtxtRegion(); PbtxtRegion(const PbtxtRegion&) = delete; @@ -210,13 +210,13 @@ class PbtxtRegion { private: void NewLineAndIndent(); - Printer* out_; + Printer* out_; PbtxtRegionType type_; int indent_; }; -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + #endif // TCMALLOC_INTERNAL_LOGGING_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc index c7b58de40f..2d3ae00436 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc @@ -20,12 +20,12 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/flags/flag.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { +#include "absl/flags/flag.h" +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + static std::string* log_buffer; static void RecordLogMessage(const char* msg, int length) { @@ -94,7 +94,7 @@ TEST(Printer, RequiredSpace) { for (int i = 0; i < 10; i++) { int length = strlen(kChunk) * i + 1; std::unique_ptr<char[]> buf(new char[length]); - Printer printer(buf.get(), length); + Printer printer(buf.get(), length); for (int j = 0; j < i; j++) { printer.printf("%s", kChunk); @@ -112,6 +112,6 @@ TEST(Printer, RequiredSpace) { } } -} // namespace -} // namespace tcmalloc_internal +} // namespace +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc index 36c2b38771..96af48c34c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc @@ -1,18 +1,18 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This is a trivial program. When run with a virtual address size rlimit, -// TCMalloc should crash cleanly, rather than hang. - -int main() { return 0; } +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a trivial program. When run with a virtual address size rlimit, +// TCMalloc should crash cleanly, rather than hang. + +int main() { return 0; } diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc index 71591834d4..009799d0ec 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc @@ -20,11 +20,11 @@ #include <unistd.h> #include "absl/strings/numbers.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/util.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -129,4 +129,4 @@ bool GetMemoryStats(MemoryStats* stats) { } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h index a65f5b03d3..1d7dc8d28a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h @@ -17,9 +17,9 @@ #include <stdint.h> -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -36,6 +36,6 @@ bool GetMemoryStats(MemoryStats* stats); } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_MEMORY_STATS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc index e4120bcf5a..2baf01beaa 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc @@ -20,20 +20,20 @@ #include <algorithm> #include <cstdint> -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Class that implements the call into the OS provided mincore() function. class OsMInCore final : public MInCoreInterface { - public: + public: int mincore(void* addr, size_t length, unsigned char* result) final { return ::mincore(addr, length, result); } - - ~OsMInCore() override = default; + + ~OsMInCore() override = default; }; // Returns the number of resident bytes for an range of memory of arbitrary @@ -124,6 +124,6 @@ size_t MInCore::residence(void* addr, size_t size) { return residence_impl(addr, size, &mc); } -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h index c353bdac87..de23932032 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h @@ -17,11 +17,11 @@ #include <stddef.h> -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Class to wrap mincore so that we can replace it for testing. class MInCoreInterface { @@ -58,8 +58,8 @@ class MInCore { friend class MInCoreTest; }; -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_MINCORE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc index 02c8ead48d..cd42e8f440 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc @@ -25,7 +25,7 @@ #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/mincore.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace { @@ -58,4 +58,4 @@ BENCHMARK(BM_mincore)->Range(1, 16 * 1024); } // namespace } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc index daa1178b25..e04dc60a94 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc @@ -24,17 +24,17 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/internal/logging.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { using ::testing::Eq; // Mock interface to mincore() which has reports residence based on // an array provided at construction. -class MInCoreMock : public MInCoreInterface { +class MInCoreMock : public MInCoreInterface { public: MInCoreMock() : mapped_() {} ~MInCoreMock() override {} @@ -77,10 +77,10 @@ class MInCoreTest { // Expose the internal size of array that we use to call mincore() so // that we can be sure to need multiple calls to cover large memory regions. - const size_t chunkSize() { return MInCore::kArrayLength; } + const size_t chunkSize() { return MInCore::kArrayLength; } private: - MInCoreMock mcm_; + MInCoreMock mcm_; }; namespace { @@ -88,7 +88,7 @@ namespace { using ::testing::Eq; TEST(StaticVarsTest, TestResidence) { - MInCoreTest mct; + MInCoreTest mct; const size_t kPageSize = getpagesize(); // Set up a pattern with a few resident pages. @@ -142,7 +142,7 @@ TEST(StaticVarsTest, TestResidence) { // Test whether we are correctly handling multiple calls to mincore. TEST(StaticVarsTest, TestLargeResidence) { - MInCoreTest mct; + MInCoreTest mct; uintptr_t uAddress = 0; const size_t kPageSize = getpagesize(); // Set up a pattern covering 6 * page size * MInCore::kArrayLength to @@ -189,5 +189,5 @@ TEST(StaticVarsTest, UnmappedMemory) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc index 1639bd1b6d..45161e3085 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc @@ -1,220 +1,220 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/numa.h" - -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <sched.h> -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <unistd.h> - -#include <array> -#include <cstring> - -#include "absl/base/attributes.h" -#include "absl/base/internal/sysinfo.h" -#include "absl/functional/function_ref.h" -#include "absl/strings/numbers.h" -#include "absl/strings/string_view.h" -#include "tcmalloc/internal/config.h" -#include "tcmalloc/internal/environment.h" -#include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/util.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// Returns true iff NUMA awareness should be enabled by default (i.e. in the -// absence of the TCMALLOC_NUMA_AWARE environment variable). This weak -// implementation may be overridden by the one in want_numa_aware.cc. -ABSL_ATTRIBUTE_WEAK bool default_want_numa_aware() { return false; } - -int OpenSysfsCpulist(size_t node) { - char path[PATH_MAX]; - snprintf(path, sizeof(path), "/sys/devices/system/node/node%zu/cpulist", - node); - return signal_safe_open(path, O_RDONLY | O_CLOEXEC); -} - -cpu_set_t ParseCpulist(absl::FunctionRef<ssize_t(char *, size_t)> read) { - cpu_set_t set; - CPU_ZERO(&set); - - std::array<char, 16> buf; - size_t carry_over = 0; - int cpu_from = -1; - - while (true) { - const ssize_t rc = read(buf.data() + carry_over, buf.size() - carry_over); - CHECK_CONDITION(rc >= 0); - - const absl::string_view current(buf.data(), carry_over + rc); - - // If we have no more data to parse & couldn't read any then we've reached - // the end of the input & are done. - if (current.empty() && rc == 0) { - break; - } - - size_t consumed; - const size_t dash = current.find('-'); - const size_t comma = current.find(','); - if (dash != absl::string_view::npos && dash < comma) { - CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, dash), &cpu_from)); - consumed = dash + 1; - } else if (comma != absl::string_view::npos || rc == 0) { - int cpu; - CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, comma), &cpu)); - if (comma == absl::string_view::npos) { - consumed = current.size(); - } else { - consumed = comma + 1; - } - if (cpu_from != -1) { - for (int c = cpu_from; c <= cpu; c++) { - CPU_SET(c, &set); - } - cpu_from = -1; - } else { - CPU_SET(cpu, &set); - } - } else { - consumed = 0; - } - - carry_over = current.size() - consumed; - memmove(buf.data(), buf.data() + consumed, carry_over); - } - - return set; -} - -bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], - uint64_t *const partition_to_nodes, - NumaBindMode *const bind_mode, - const size_t num_partitions, const size_t scale_by, - absl::FunctionRef<int(size_t)> open_node_cpulist) { - // Node 0 will always map to partition 0; record it here in case the system - // doesn't support NUMA or the user opts out of our awareness of it - in - // either case we'll record nothing in the loop below. - partition_to_nodes[NodeToPartition(0, num_partitions)] |= 1 << 0; - - // If we only compiled in support for one partition then we're trivially - // done; NUMA awareness is unavailable. - if (num_partitions == 1) return false; - - // We rely on rseq to quickly obtain a CPU ID & lookup the appropriate - // partition in NumaTopology::GetCurrentPartition(). If rseq is unavailable, - // disable NUMA awareness. - if (!subtle::percpu::IsFast()) return false; - - // Honor default_want_numa_aware() to allow compile time configuration of - // whether to enable NUMA awareness by default, and allow the user to - // override that either way by setting TCMALLOC_NUMA_AWARE in the - // environment. - // - // In cases where we don't enable NUMA awareness we simply return. Since the - // cpu_to_scaled_partition & partition_to_nodes arrays are zero initialized - // we're trivially done - CPUs all map to partition 0, which contains only - // CPU 0 added above. - const char *e = - tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_NUMA_AWARE"); - if (e == nullptr) { - // Enable NUMA awareness iff default_want_numa_aware(). - if (!default_want_numa_aware()) return false; - } else if (!strcmp(e, "no-binding")) { - // Enable NUMA awareness with no memory binding behavior. - *bind_mode = NumaBindMode::kNone; - } else if (!strcmp(e, "advisory-binding") || !strcmp(e, "1")) { - // Enable NUMA awareness with advisory memory binding behavior. - *bind_mode = NumaBindMode::kAdvisory; - } else if (!strcmp(e, "strict-binding")) { - // Enable NUMA awareness with strict memory binding behavior. - *bind_mode = NumaBindMode::kStrict; - } else if (!strcmp(e, "0")) { - // Disable NUMA awareness. - return false; - } else { - Crash(kCrash, __FILE__, __LINE__, "bad TCMALLOC_NUMA_AWARE env var", e); - } - - // The cpu_to_scaled_partition array has a fixed size so that we can - // statically allocate it & avoid the need to check whether it has been - // allocated prior to lookups. It has CPU_SETSIZE entries which ought to be - // sufficient, but sanity check that indexing it by CPU number shouldn't - // exceed its bounds. - int num_cpus = absl::base_internal::NumCPUs(); - CHECK_CONDITION(num_cpus <= CPU_SETSIZE); - - // We could just always report that we're NUMA aware, but if a NUMA-aware - // binary runs on a system that doesn't include multiple NUMA nodes then our - // NUMA awareness will offer no benefit whilst incurring the cost of - // redundant work & stats. As such we only report that we're NUMA aware if - // there's actually NUMA to be aware of, which we track here. - bool numa_aware = false; - - for (size_t node = 0;; node++) { - // Detect NUMA nodes by opening their cpulist files from sysfs. - const int fd = open_node_cpulist(node); - if (fd == -1) { - // We expect to encounter ENOENT once node surpasses the actual number of - // nodes present in the system. Any other error is a problem. - CHECK_CONDITION(errno == ENOENT); - break; - } - - // Record this node in partition_to_nodes. - const size_t partition = NodeToPartition(node, num_partitions); - partition_to_nodes[partition] |= 1 << node; - - // cpu_to_scaled_partition_ entries are default initialized to zero, so - // skip redundantly parsing CPU lists for nodes that map to partition 0. - if (partition == 0) { - signal_safe_close(fd); - continue; - } - - // Parse the cpulist file to determine which CPUs are local to this node. - const cpu_set_t node_cpus = - ParseCpulist([&](char *const buf, const size_t count) { - return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr); - }); - - // Assign local CPUs to the appropriate partition. - for (size_t cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (CPU_ISSET(cpu, &node_cpus)) { - cpu_to_scaled_partition[cpu + kNumaCpuFudge] = partition * scale_by; - } - } - - // If we observed any CPUs for this node then we've now got CPUs assigned - // to a non-zero partition; report that we're NUMA aware. - if (CPU_COUNT(&node_cpus) != 0) { - numa_aware = true; - } - - signal_safe_close(fd); - } - - return numa_aware; -} - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/numa.h" + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <unistd.h> + +#include <array> +#include <cstring> + +#include "absl/base/attributes.h" +#include "absl/base/internal/sysinfo.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/environment.h" +#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/util.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Returns true iff NUMA awareness should be enabled by default (i.e. in the +// absence of the TCMALLOC_NUMA_AWARE environment variable). This weak +// implementation may be overridden by the one in want_numa_aware.cc. +ABSL_ATTRIBUTE_WEAK bool default_want_numa_aware() { return false; } + +int OpenSysfsCpulist(size_t node) { + char path[PATH_MAX]; + snprintf(path, sizeof(path), "/sys/devices/system/node/node%zu/cpulist", + node); + return signal_safe_open(path, O_RDONLY | O_CLOEXEC); +} + +cpu_set_t ParseCpulist(absl::FunctionRef<ssize_t(char *, size_t)> read) { + cpu_set_t set; + CPU_ZERO(&set); + + std::array<char, 16> buf; + size_t carry_over = 0; + int cpu_from = -1; + + while (true) { + const ssize_t rc = read(buf.data() + carry_over, buf.size() - carry_over); + CHECK_CONDITION(rc >= 0); + + const absl::string_view current(buf.data(), carry_over + rc); + + // If we have no more data to parse & couldn't read any then we've reached + // the end of the input & are done. + if (current.empty() && rc == 0) { + break; + } + + size_t consumed; + const size_t dash = current.find('-'); + const size_t comma = current.find(','); + if (dash != absl::string_view::npos && dash < comma) { + CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, dash), &cpu_from)); + consumed = dash + 1; + } else if (comma != absl::string_view::npos || rc == 0) { + int cpu; + CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, comma), &cpu)); + if (comma == absl::string_view::npos) { + consumed = current.size(); + } else { + consumed = comma + 1; + } + if (cpu_from != -1) { + for (int c = cpu_from; c <= cpu; c++) { + CPU_SET(c, &set); + } + cpu_from = -1; + } else { + CPU_SET(cpu, &set); + } + } else { + consumed = 0; + } + + carry_over = current.size() - consumed; + memmove(buf.data(), buf.data() + consumed, carry_over); + } + + return set; +} + +bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], + uint64_t *const partition_to_nodes, + NumaBindMode *const bind_mode, + const size_t num_partitions, const size_t scale_by, + absl::FunctionRef<int(size_t)> open_node_cpulist) { + // Node 0 will always map to partition 0; record it here in case the system + // doesn't support NUMA or the user opts out of our awareness of it - in + // either case we'll record nothing in the loop below. + partition_to_nodes[NodeToPartition(0, num_partitions)] |= 1 << 0; + + // If we only compiled in support for one partition then we're trivially + // done; NUMA awareness is unavailable. + if (num_partitions == 1) return false; + + // We rely on rseq to quickly obtain a CPU ID & lookup the appropriate + // partition in NumaTopology::GetCurrentPartition(). If rseq is unavailable, + // disable NUMA awareness. + if (!subtle::percpu::IsFast()) return false; + + // Honor default_want_numa_aware() to allow compile time configuration of + // whether to enable NUMA awareness by default, and allow the user to + // override that either way by setting TCMALLOC_NUMA_AWARE in the + // environment. + // + // In cases where we don't enable NUMA awareness we simply return. Since the + // cpu_to_scaled_partition & partition_to_nodes arrays are zero initialized + // we're trivially done - CPUs all map to partition 0, which contains only + // CPU 0 added above. + const char *e = + tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_NUMA_AWARE"); + if (e == nullptr) { + // Enable NUMA awareness iff default_want_numa_aware(). + if (!default_want_numa_aware()) return false; + } else if (!strcmp(e, "no-binding")) { + // Enable NUMA awareness with no memory binding behavior. + *bind_mode = NumaBindMode::kNone; + } else if (!strcmp(e, "advisory-binding") || !strcmp(e, "1")) { + // Enable NUMA awareness with advisory memory binding behavior. + *bind_mode = NumaBindMode::kAdvisory; + } else if (!strcmp(e, "strict-binding")) { + // Enable NUMA awareness with strict memory binding behavior. + *bind_mode = NumaBindMode::kStrict; + } else if (!strcmp(e, "0")) { + // Disable NUMA awareness. + return false; + } else { + Crash(kCrash, __FILE__, __LINE__, "bad TCMALLOC_NUMA_AWARE env var", e); + } + + // The cpu_to_scaled_partition array has a fixed size so that we can + // statically allocate it & avoid the need to check whether it has been + // allocated prior to lookups. It has CPU_SETSIZE entries which ought to be + // sufficient, but sanity check that indexing it by CPU number shouldn't + // exceed its bounds. + int num_cpus = absl::base_internal::NumCPUs(); + CHECK_CONDITION(num_cpus <= CPU_SETSIZE); + + // We could just always report that we're NUMA aware, but if a NUMA-aware + // binary runs on a system that doesn't include multiple NUMA nodes then our + // NUMA awareness will offer no benefit whilst incurring the cost of + // redundant work & stats. As such we only report that we're NUMA aware if + // there's actually NUMA to be aware of, which we track here. + bool numa_aware = false; + + for (size_t node = 0;; node++) { + // Detect NUMA nodes by opening their cpulist files from sysfs. + const int fd = open_node_cpulist(node); + if (fd == -1) { + // We expect to encounter ENOENT once node surpasses the actual number of + // nodes present in the system. Any other error is a problem. + CHECK_CONDITION(errno == ENOENT); + break; + } + + // Record this node in partition_to_nodes. + const size_t partition = NodeToPartition(node, num_partitions); + partition_to_nodes[partition] |= 1 << node; + + // cpu_to_scaled_partition_ entries are default initialized to zero, so + // skip redundantly parsing CPU lists for nodes that map to partition 0. + if (partition == 0) { + signal_safe_close(fd); + continue; + } + + // Parse the cpulist file to determine which CPUs are local to this node. + const cpu_set_t node_cpus = + ParseCpulist([&](char *const buf, const size_t count) { + return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr); + }); + + // Assign local CPUs to the appropriate partition. + for (size_t cpu = 0; cpu < CPU_SETSIZE; cpu++) { + if (CPU_ISSET(cpu, &node_cpus)) { + cpu_to_scaled_partition[cpu + kNumaCpuFudge] = partition * scale_by; + } + } + + // If we observed any CPUs for this node then we've now got CPUs assigned + // to a non-zero partition; report that we're NUMA aware. + if (CPU_COUNT(&node_cpus) != 0) { + numa_aware = true; + } + + signal_safe_close(fd); + } + + return numa_aware; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.h b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h index bf04c65c21..d6e5b34d5a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/numa.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h @@ -1,227 +1,227 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TCMALLOC_INTERNAL_NUMA_H_ -#define TCMALLOC_INTERNAL_NUMA_H_ - -#include <sched.h> -#include <stddef.h> -#include <sys/types.h> - -#include "absl/functional/function_ref.h" -#include "tcmalloc/internal/config.h" -#include "tcmalloc/internal/percpu.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// Indicates how TCMalloc should handle binding memory regions to nodes within -// particular NUMA partitions. -enum class NumaBindMode { - // Don't bind memory at all. Note that this does not make NUMA awareness - // pointless so long as the NUMA memory policy of threads performing - // allocations favors the local node. It does mean that we won't be certain - // that memory is local to any particular partition, it will just be likely. - kNone, - // Attempt to bind memory but don't treat failure as fatal. If binding fails - // then a warning will be logged & we'll then be in much the same state as - // kNone. - kAdvisory, - // Strictly bind memory to nodes within the partition we expect - any error - // in doing so is fatal & the program will crash. This allows a program to - // ensure that memory is definitely bound to the set of nodes we expect. - kStrict, -}; - -// We use the result of RseqCpuId() in GetCurrentPartition() to avoid branching -// in the fast path, but this means that the CPU number we look up in -// cpu_to_scaled_partition_ might equal kCpuIdUninitialized or -// kCpuIdUnsupported. We add this fudge factor to the value to compensate, -// ensuring that our accesses to the cpu_to_scaled_partition_ array are always -// in bounds. -static constexpr size_t kNumaCpuFudge = -subtle::percpu::kCpuIdUnsupported; - -// Provides information about the topology of a NUMA system. -// -// In general we cannot know at compile time how many NUMA nodes the system -// that we run upon will include, but we also cannot size our data structures -// arbitrarily at runtime in the name of efficiency. In order to resolve the -// conflict between these two constraints we define the concept of a NUMA -// 'partition' as being an arbitrary set of NUMA nodes, disjoint from all other -// partitions. At compile time we select a fixed number of partitions to -// support, and at runtime we map each NUMA node in the system to a partition. -// If the number of supported partitions is greater than or equal to the number -// of NUMA nodes in the system then partition & node are effectively identical. -// If however the system has more nodes than we do partitions then nodes -// assigned to the same partition will share size classes & thus memory. This -// may incur a performance hit, but allows us to at least run on any system. -template <size_t NumPartitions, size_t ScaleBy = 1> -class NumaTopology { - public: - // Trivially zero initialize data members. - constexpr NumaTopology() = default; - - // Initialize topology information. This must be called only once, before any - // of the functions below. - void Init(); - - // Like Init(), but allows a test to specify a different `open_node_cpulist` - // function in order to provide NUMA topology information that doesn't - // reflect the system we're running upon. - void InitForTest(absl::FunctionRef<int(size_t)> open_node_cpulist); - - // Returns true if NUMA awareness is available & enabled, otherwise false. - bool numa_aware() const { - // Explicitly checking NumPartitions here provides a compile time constant - // false in cases where NumPartitions==1, allowing NUMA awareness to be - // optimized away. - return (NumPartitions > 1) && numa_aware_; - } - - // Returns the number of NUMA partitions deemed 'active' - i.e. the number of - // partitions that other parts of TCMalloc need to concern themselves with. - // Checking this rather than using kNumaPartitions allows users to avoid work - // on non-zero partitions when NUMA awareness is disabled. - size_t active_partitions() const { return numa_aware() ? NumPartitions : 1; } - - // Return a value indicating how we should behave with regards to binding - // memory regions to NUMA nodes. - NumaBindMode bind_mode() const { return bind_mode_; } - - // Return the NUMA partition number to which the CPU we're currently - // executing upon belongs. Note that whilst the CPU->partition mapping is - // fixed, the return value of this function may change at arbitrary times as - // this thread migrates between CPUs. - size_t GetCurrentPartition() const; - - // Like GetCurrentPartition(), but returns a partition number multiplied by - // ScaleBy. - size_t GetCurrentScaledPartition() const; - - // Return the NUMA partition number to which `cpu` belongs. - // - // It is valid for cpu to equal subtle::percpu::kCpuIdUninitialized or - // subtle::percpu::kCpuIdUnsupported. In either case partition 0 will be - // returned. - size_t GetCpuPartition(int cpu) const; - - // Like GetCpuPartition(), but returns a partition number multiplied by - // ScaleBy. - size_t GetCpuScaledPartition(int cpu) const; - - // Return a bitmap in which set bits identify the nodes that belong to the - // specified NUMA `partition`. - uint64_t GetPartitionNodes(int partition) const; - - private: - // Maps from CPU number (plus kNumaCpuFudge) to NUMA partition. - size_t cpu_to_scaled_partition_[CPU_SETSIZE + kNumaCpuFudge] = {0}; - // Maps from NUMA partition to a bitmap of NUMA nodes within the partition. - uint64_t partition_to_nodes_[NumPartitions] = {0}; - // Indicates whether NUMA awareness is available & enabled. - bool numa_aware_ = false; - // Desired memory binding behavior. - NumaBindMode bind_mode_ = NumaBindMode::kAdvisory; -}; - -// Opens a /sys/devices/system/node/nodeX/cpulist file for read only access & -// returns the file descriptor. -int OpenSysfsCpulist(size_t node); - -// Parse a CPU list in the format used by -// /sys/devices/system/node/nodeX/cpulist files - that is, individual CPU -// numbers or ranges in the format <start>-<end> inclusive all joined by comma -// characters. -// -// The read function is expected to operate much like the read syscall. It -// should read up to `count` bytes into `buf` and return the number of bytes -// actually read. If an error occurs during reading it should return -1 with -// errno set to an appropriate error code. -cpu_set_t ParseCpulist( - absl::FunctionRef<ssize_t(char *buf, size_t count)> read); - -// Initialize the data members of a NumaTopology<> instance. -// -// This function must only be called once per NumaTopology<> instance, and -// relies upon the data members of that instance being default initialized. -// -// The `open_node_cpulist` function is typically OpenSysfsCpulist but tests may -// use a different implementation. -// -// Returns true if we're actually NUMA aware; i.e. if we have CPUs mapped to -// multiple partitions. -bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], - uint64_t *partition_to_nodes, NumaBindMode *bind_mode, - size_t num_partitions, size_t scale_by, - absl::FunctionRef<int(size_t)> open_node_cpulist); - -// Returns the NUMA partition to which `node` belongs. -inline size_t NodeToPartition(const size_t node, const size_t num_partitions) { - return node % num_partitions; -} - -template <size_t NumPartitions, size_t ScaleBy> -inline void NumaTopology<NumPartitions, ScaleBy>::Init() { - numa_aware_ = - InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_, - &bind_mode_, NumPartitions, ScaleBy, OpenSysfsCpulist); -} - -template <size_t NumPartitions, size_t ScaleBy> -inline void NumaTopology<NumPartitions, ScaleBy>::InitForTest( - absl::FunctionRef<int(size_t)> open_node_cpulist) { - numa_aware_ = - InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_, - &bind_mode_, NumPartitions, ScaleBy, open_node_cpulist); -} - -template <size_t NumPartitions, size_t ScaleBy> -inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentPartition() - const { - if constexpr (NumPartitions == 1) return 0; - return GetCpuPartition(subtle::percpu::RseqCpuId()); -} - -template <size_t NumPartitions, size_t ScaleBy> -inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentScaledPartition() - const { - if constexpr (NumPartitions == 1) return 0; - return GetCpuScaledPartition(subtle::percpu::RseqCpuId()); -} - -template <size_t NumPartitions, size_t ScaleBy> -inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuPartition( - const int cpu) const { - return GetCpuScaledPartition(cpu) / ScaleBy; -} - -template <size_t NumPartitions, size_t ScaleBy> -inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuScaledPartition( - const int cpu) const { - if constexpr (NumPartitions == 1) return 0; - return cpu_to_scaled_partition_[cpu + kNumaCpuFudge]; -} - -template <size_t NumPartitions, size_t ScaleBy> -inline uint64_t NumaTopology<NumPartitions, ScaleBy>::GetPartitionNodes( - const int partition) const { - return partition_to_nodes_[partition]; -} - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -#endif // TCMALLOC_INTERNAL_NUMA_H_ +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TCMALLOC_INTERNAL_NUMA_H_ +#define TCMALLOC_INTERNAL_NUMA_H_ + +#include <sched.h> +#include <stddef.h> +#include <sys/types.h> + +#include "absl/functional/function_ref.h" +#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/percpu.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// Indicates how TCMalloc should handle binding memory regions to nodes within +// particular NUMA partitions. +enum class NumaBindMode { + // Don't bind memory at all. Note that this does not make NUMA awareness + // pointless so long as the NUMA memory policy of threads performing + // allocations favors the local node. It does mean that we won't be certain + // that memory is local to any particular partition, it will just be likely. + kNone, + // Attempt to bind memory but don't treat failure as fatal. If binding fails + // then a warning will be logged & we'll then be in much the same state as + // kNone. + kAdvisory, + // Strictly bind memory to nodes within the partition we expect - any error + // in doing so is fatal & the program will crash. This allows a program to + // ensure that memory is definitely bound to the set of nodes we expect. + kStrict, +}; + +// We use the result of RseqCpuId() in GetCurrentPartition() to avoid branching +// in the fast path, but this means that the CPU number we look up in +// cpu_to_scaled_partition_ might equal kCpuIdUninitialized or +// kCpuIdUnsupported. We add this fudge factor to the value to compensate, +// ensuring that our accesses to the cpu_to_scaled_partition_ array are always +// in bounds. +static constexpr size_t kNumaCpuFudge = -subtle::percpu::kCpuIdUnsupported; + +// Provides information about the topology of a NUMA system. +// +// In general we cannot know at compile time how many NUMA nodes the system +// that we run upon will include, but we also cannot size our data structures +// arbitrarily at runtime in the name of efficiency. In order to resolve the +// conflict between these two constraints we define the concept of a NUMA +// 'partition' as being an arbitrary set of NUMA nodes, disjoint from all other +// partitions. At compile time we select a fixed number of partitions to +// support, and at runtime we map each NUMA node in the system to a partition. +// If the number of supported partitions is greater than or equal to the number +// of NUMA nodes in the system then partition & node are effectively identical. +// If however the system has more nodes than we do partitions then nodes +// assigned to the same partition will share size classes & thus memory. This +// may incur a performance hit, but allows us to at least run on any system. +template <size_t NumPartitions, size_t ScaleBy = 1> +class NumaTopology { + public: + // Trivially zero initialize data members. + constexpr NumaTopology() = default; + + // Initialize topology information. This must be called only once, before any + // of the functions below. + void Init(); + + // Like Init(), but allows a test to specify a different `open_node_cpulist` + // function in order to provide NUMA topology information that doesn't + // reflect the system we're running upon. + void InitForTest(absl::FunctionRef<int(size_t)> open_node_cpulist); + + // Returns true if NUMA awareness is available & enabled, otherwise false. + bool numa_aware() const { + // Explicitly checking NumPartitions here provides a compile time constant + // false in cases where NumPartitions==1, allowing NUMA awareness to be + // optimized away. + return (NumPartitions > 1) && numa_aware_; + } + + // Returns the number of NUMA partitions deemed 'active' - i.e. the number of + // partitions that other parts of TCMalloc need to concern themselves with. + // Checking this rather than using kNumaPartitions allows users to avoid work + // on non-zero partitions when NUMA awareness is disabled. + size_t active_partitions() const { return numa_aware() ? NumPartitions : 1; } + + // Return a value indicating how we should behave with regards to binding + // memory regions to NUMA nodes. + NumaBindMode bind_mode() const { return bind_mode_; } + + // Return the NUMA partition number to which the CPU we're currently + // executing upon belongs. Note that whilst the CPU->partition mapping is + // fixed, the return value of this function may change at arbitrary times as + // this thread migrates between CPUs. + size_t GetCurrentPartition() const; + + // Like GetCurrentPartition(), but returns a partition number multiplied by + // ScaleBy. + size_t GetCurrentScaledPartition() const; + + // Return the NUMA partition number to which `cpu` belongs. + // + // It is valid for cpu to equal subtle::percpu::kCpuIdUninitialized or + // subtle::percpu::kCpuIdUnsupported. In either case partition 0 will be + // returned. + size_t GetCpuPartition(int cpu) const; + + // Like GetCpuPartition(), but returns a partition number multiplied by + // ScaleBy. + size_t GetCpuScaledPartition(int cpu) const; + + // Return a bitmap in which set bits identify the nodes that belong to the + // specified NUMA `partition`. + uint64_t GetPartitionNodes(int partition) const; + + private: + // Maps from CPU number (plus kNumaCpuFudge) to NUMA partition. + size_t cpu_to_scaled_partition_[CPU_SETSIZE + kNumaCpuFudge] = {0}; + // Maps from NUMA partition to a bitmap of NUMA nodes within the partition. + uint64_t partition_to_nodes_[NumPartitions] = {0}; + // Indicates whether NUMA awareness is available & enabled. + bool numa_aware_ = false; + // Desired memory binding behavior. + NumaBindMode bind_mode_ = NumaBindMode::kAdvisory; +}; + +// Opens a /sys/devices/system/node/nodeX/cpulist file for read only access & +// returns the file descriptor. +int OpenSysfsCpulist(size_t node); + +// Parse a CPU list in the format used by +// /sys/devices/system/node/nodeX/cpulist files - that is, individual CPU +// numbers or ranges in the format <start>-<end> inclusive all joined by comma +// characters. +// +// The read function is expected to operate much like the read syscall. It +// should read up to `count` bytes into `buf` and return the number of bytes +// actually read. If an error occurs during reading it should return -1 with +// errno set to an appropriate error code. +cpu_set_t ParseCpulist( + absl::FunctionRef<ssize_t(char *buf, size_t count)> read); + +// Initialize the data members of a NumaTopology<> instance. +// +// This function must only be called once per NumaTopology<> instance, and +// relies upon the data members of that instance being default initialized. +// +// The `open_node_cpulist` function is typically OpenSysfsCpulist but tests may +// use a different implementation. +// +// Returns true if we're actually NUMA aware; i.e. if we have CPUs mapped to +// multiple partitions. +bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE], + uint64_t *partition_to_nodes, NumaBindMode *bind_mode, + size_t num_partitions, size_t scale_by, + absl::FunctionRef<int(size_t)> open_node_cpulist); + +// Returns the NUMA partition to which `node` belongs. +inline size_t NodeToPartition(const size_t node, const size_t num_partitions) { + return node % num_partitions; +} + +template <size_t NumPartitions, size_t ScaleBy> +inline void NumaTopology<NumPartitions, ScaleBy>::Init() { + numa_aware_ = + InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_, + &bind_mode_, NumPartitions, ScaleBy, OpenSysfsCpulist); +} + +template <size_t NumPartitions, size_t ScaleBy> +inline void NumaTopology<NumPartitions, ScaleBy>::InitForTest( + absl::FunctionRef<int(size_t)> open_node_cpulist) { + numa_aware_ = + InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_, + &bind_mode_, NumPartitions, ScaleBy, open_node_cpulist); +} + +template <size_t NumPartitions, size_t ScaleBy> +inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentPartition() + const { + if constexpr (NumPartitions == 1) return 0; + return GetCpuPartition(subtle::percpu::RseqCpuId()); +} + +template <size_t NumPartitions, size_t ScaleBy> +inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentScaledPartition() + const { + if constexpr (NumPartitions == 1) return 0; + return GetCpuScaledPartition(subtle::percpu::RseqCpuId()); +} + +template <size_t NumPartitions, size_t ScaleBy> +inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuPartition( + const int cpu) const { + return GetCpuScaledPartition(cpu) / ScaleBy; +} + +template <size_t NumPartitions, size_t ScaleBy> +inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuScaledPartition( + const int cpu) const { + if constexpr (NumPartitions == 1) return 0; + return cpu_to_scaled_partition_[cpu + kNumaCpuFudge]; +} + +template <size_t NumPartitions, size_t ScaleBy> +inline uint64_t NumaTopology<NumPartitions, ScaleBy>::GetPartitionNodes( + const int partition) const { + return partition_to_nodes_[partition]; +} + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +#endif // TCMALLOC_INTERNAL_NUMA_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc index bbd86a3f7d..29dbeffd71 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc @@ -1,284 +1,284 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tcmalloc/internal/numa.h" - -#include <errno.h> -#include <linux/memfd.h> -#include <sched.h> -#include <stddef.h> -#include <stdio.h> -#include <string.h> -#include <syscall.h> -#include <unistd.h> - -#include <algorithm> -#include <new> -#include <string> -#include <utility> -#include <vector> - -#include "gtest/gtest.h" -#include "absl/random/random.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" -#include "absl/strings/string_view.h" -#include "absl/types/span.h" -#include "tcmalloc/internal/logging.h" - -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - -int memfd_create(const char *name, unsigned int flags) { -#ifdef __NR_memfd_create - return syscall(__NR_memfd_create, name, flags); -#else - errno = ENOSYS; - return -1; -#endif -} - -// A synthetic cpulist that can be read from a file descriptor. -class SyntheticCpuList { - public: - explicit SyntheticCpuList(const absl::string_view content) { - fd_ = memfd_create("cpulist", MFD_CLOEXEC); - CHECK_CONDITION(fd_ != -1); - - CHECK_CONDITION(write(fd_, content.data(), content.size()) == - content.size()); - CHECK_CONDITION(write(fd_, "\n", 1) == 1); - CHECK_CONDITION(lseek(fd_, 0, SEEK_SET) == 0); - } - - ~SyntheticCpuList() { close(fd_); } - - // Disallow copies, which would make require reference counting to know when - // we should close fd_. - SyntheticCpuList(const SyntheticCpuList &) = delete; - SyntheticCpuList &operator=(const SyntheticCpuList &) = delete; - - // Moves are fine - only one instance at a time holds the fd. - SyntheticCpuList(SyntheticCpuList &&other) - : fd_(std::exchange(other.fd_, -1)) {} - SyntheticCpuList &operator=(SyntheticCpuList &&other) { - new (this) SyntheticCpuList(std::move(other)); - return *this; - } - - int fd() const { return fd_; } - - private: - // The underlying memfd. - int fd_; -}; - -class NumaTopologyTest : public ::testing::Test { - protected: - void SetUp() override { - // We use memfd to create synthetic cpulist files, and can't run without - // it. Skip all affected tests if memfd is not supported (i.e. Linux < - // 3.17). - const int fd = memfd_create("test", MFD_CLOEXEC); - if (fd == -1 && errno == ENOSYS) { - GTEST_SKIP() << "Test requires memfd support"; - } - close(fd); - - // If rseq is unavailable the NumaTopology never enables NUMA awareness. - if (!subtle::percpu::IsFast()) { - GTEST_SKIP() << "Test requires rseq support"; - } - } -}; - -template <size_t NumPartitions> -NumaTopology<NumPartitions> CreateNumaTopology( - const absl::Span<const SyntheticCpuList> cpu_lists) { - NumaTopology<NumPartitions> nt; - nt.InitForTest([&](const size_t node) { - if (node >= cpu_lists.size()) { - errno = ENOENT; - return -1; - } - return cpu_lists[node].fd(); - }); - return nt; -} - -// Ensure that if we set NumPartitions=1 then NUMA awareness is disabled even -// in the presence of a system with multiple NUMA nodes. -TEST_F(NumaTopologyTest, NoCompileTimeNuma) { - std::vector<SyntheticCpuList> nodes; - nodes.emplace_back("0"); - nodes.emplace_back("1"); - - const auto nt = CreateNumaTopology<1>(nodes); - - EXPECT_EQ(nt.numa_aware(), false); - EXPECT_EQ(nt.active_partitions(), 1); -} - -// Ensure that if we run on a system with no NUMA support at all (i.e. no -// /sys/devices/system/node/nodeX/cpulist files) we correctly disable NUMA -// awareness. -TEST_F(NumaTopologyTest, NoRunTimeNuma) { - const auto nt = CreateNumaTopology<2>({}); - - EXPECT_EQ(nt.numa_aware(), false); - EXPECT_EQ(nt.active_partitions(), 1); -} - -// Ensure that if we run on a system with only 1 node then we disable NUMA -// awareness. -TEST_F(NumaTopologyTest, SingleNode) { - std::vector<SyntheticCpuList> nodes; - nodes.emplace_back("0-27"); - - const auto nt = CreateNumaTopology<4>(nodes); - - EXPECT_EQ(nt.numa_aware(), false); - EXPECT_EQ(nt.active_partitions(), 1); -} - -// Basic sanity test modelling a simple 2 node system. -TEST_F(NumaTopologyTest, TwoNode) { - std::vector<SyntheticCpuList> nodes; - nodes.emplace_back("0-5"); - nodes.emplace_back("6-11"); - - const auto nt = CreateNumaTopology<2>(nodes); - - EXPECT_EQ(nt.numa_aware(), true); - EXPECT_EQ(nt.active_partitions(), 2); - - for (int cpu = 0; cpu <= 5; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 0); - } - for (int cpu = 6; cpu <= 11; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 1); - } -} - -// Test that cpulists too long to fit into the 16 byte buffer used by -// InitNumaTopology() parse successfully. -TEST_F(NumaTopologyTest, LongCpuLists) { - std::vector<SyntheticCpuList> nodes; - - // Content from here onwards lies | - // beyond the 16 byte buffer. | - // v - nodes.emplace_back("0-1,2-3,4-5,6-7,8"); // Right after a comma - nodes.emplace_back("9,10,11,12,13,14,15-19"); // Right before a comma - nodes.emplace_back("20-21,22-23,24-25,26-29"); // Within range end - nodes.emplace_back("30-32,33,34,35,36-38,39"); // Within range start - nodes.emplace_back("40-43,44,45-49"); - - const auto nt = CreateNumaTopology<3>(nodes); - - EXPECT_EQ(nt.numa_aware(), true); - EXPECT_EQ(nt.active_partitions(), 3); - - for (int cpu = 0; cpu <= 8; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 0); - } - for (int cpu = 9; cpu <= 19; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 1); - } - for (int cpu = 20; cpu <= 29; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 2); - } - for (int cpu = 30; cpu <= 39; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 0); - } - for (int cpu = 40; cpu <= 49; cpu++) { - EXPECT_EQ(nt.GetCpuPartition(cpu), 1); - } -} - -// Ensure we can initialize using the host system's real NUMA topology -// information. -TEST_F(NumaTopologyTest, Host) { - NumaTopology<4> nt; - nt.Init(); - - // We don't actually know anything about the host, so there's not much more - // we can do beyond checking that we didn't crash. -} - -// Ensure that we can parse randomized cpulists correctly. -TEST(ParseCpulistTest, Random) { - absl::BitGen gen; - - static constexpr int kIterations = 100; - for (int i = 0; i < kIterations; i++) { - cpu_set_t reference; - CPU_ZERO(&reference); - - // Set a random number of CPUs within the reference set. - const double density = absl::Uniform(gen, 0.0, 1.0); - for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (absl::Bernoulli(gen, density)) { - CPU_SET(cpu, &reference); - } - } - - // Serialize the reference set into a cpulist-style string. - std::vector<std::string> components; - for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { - if (!CPU_ISSET(cpu, &reference)) continue; - - const int start = cpu; - int next = cpu + 1; - while (next < CPU_SETSIZE && CPU_ISSET(next, &reference)) { - cpu = next; - next = cpu + 1; - } - - if (cpu == start) { - components.push_back(absl::StrCat(cpu)); - } else { - components.push_back(absl::StrCat(start, "-", cpu)); - } - } - const std::string serialized = absl::StrJoin(components, ","); - - // Now parse that string using our ParseCpulist function, randomizing the - // amount of data we provide to it from each read. - absl::string_view remaining(serialized); - const cpu_set_t parsed = - ParseCpulist([&](char *const buf, const size_t count) -> ssize_t { - // Calculate how much data we have left to provide. - const size_t max = std::min(count, remaining.size()); - - // If none, we have no choice but to provide nothing. - if (max == 0) return 0; - - // If we do have data, return a randomly sized subset of it to stress - // the logic around reading partial values. - const size_t copy = absl::Uniform(gen, static_cast<size_t>(1), max); - memcpy(buf, remaining.data(), copy); - remaining.remove_prefix(copy); - return copy; - }); - - // We ought to have parsed the same set of CPUs that we serialized. - EXPECT_TRUE(CPU_EQUAL(&parsed, &reference)); - } -} - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tcmalloc/internal/numa.h" + +#include <errno.h> +#include <linux/memfd.h> +#include <sched.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> + +#include <algorithm> +#include <new> +#include <string> +#include <utility> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/random/random.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "tcmalloc/internal/logging.h" + +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + +int memfd_create(const char *name, unsigned int flags) { +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + +// A synthetic cpulist that can be read from a file descriptor. +class SyntheticCpuList { + public: + explicit SyntheticCpuList(const absl::string_view content) { + fd_ = memfd_create("cpulist", MFD_CLOEXEC); + CHECK_CONDITION(fd_ != -1); + + CHECK_CONDITION(write(fd_, content.data(), content.size()) == + content.size()); + CHECK_CONDITION(write(fd_, "\n", 1) == 1); + CHECK_CONDITION(lseek(fd_, 0, SEEK_SET) == 0); + } + + ~SyntheticCpuList() { close(fd_); } + + // Disallow copies, which would make require reference counting to know when + // we should close fd_. + SyntheticCpuList(const SyntheticCpuList &) = delete; + SyntheticCpuList &operator=(const SyntheticCpuList &) = delete; + + // Moves are fine - only one instance at a time holds the fd. + SyntheticCpuList(SyntheticCpuList &&other) + : fd_(std::exchange(other.fd_, -1)) {} + SyntheticCpuList &operator=(SyntheticCpuList &&other) { + new (this) SyntheticCpuList(std::move(other)); + return *this; + } + + int fd() const { return fd_; } + + private: + // The underlying memfd. + int fd_; +}; + +class NumaTopologyTest : public ::testing::Test { + protected: + void SetUp() override { + // We use memfd to create synthetic cpulist files, and can't run without + // it. Skip all affected tests if memfd is not supported (i.e. Linux < + // 3.17). + const int fd = memfd_create("test", MFD_CLOEXEC); + if (fd == -1 && errno == ENOSYS) { + GTEST_SKIP() << "Test requires memfd support"; + } + close(fd); + + // If rseq is unavailable the NumaTopology never enables NUMA awareness. + if (!subtle::percpu::IsFast()) { + GTEST_SKIP() << "Test requires rseq support"; + } + } +}; + +template <size_t NumPartitions> +NumaTopology<NumPartitions> CreateNumaTopology( + const absl::Span<const SyntheticCpuList> cpu_lists) { + NumaTopology<NumPartitions> nt; + nt.InitForTest([&](const size_t node) { + if (node >= cpu_lists.size()) { + errno = ENOENT; + return -1; + } + return cpu_lists[node].fd(); + }); + return nt; +} + +// Ensure that if we set NumPartitions=1 then NUMA awareness is disabled even +// in the presence of a system with multiple NUMA nodes. +TEST_F(NumaTopologyTest, NoCompileTimeNuma) { + std::vector<SyntheticCpuList> nodes; + nodes.emplace_back("0"); + nodes.emplace_back("1"); + + const auto nt = CreateNumaTopology<1>(nodes); + + EXPECT_EQ(nt.numa_aware(), false); + EXPECT_EQ(nt.active_partitions(), 1); +} + +// Ensure that if we run on a system with no NUMA support at all (i.e. no +// /sys/devices/system/node/nodeX/cpulist files) we correctly disable NUMA +// awareness. +TEST_F(NumaTopologyTest, NoRunTimeNuma) { + const auto nt = CreateNumaTopology<2>({}); + + EXPECT_EQ(nt.numa_aware(), false); + EXPECT_EQ(nt.active_partitions(), 1); +} + +// Ensure that if we run on a system with only 1 node then we disable NUMA +// awareness. +TEST_F(NumaTopologyTest, SingleNode) { + std::vector<SyntheticCpuList> nodes; + nodes.emplace_back("0-27"); + + const auto nt = CreateNumaTopology<4>(nodes); + + EXPECT_EQ(nt.numa_aware(), false); + EXPECT_EQ(nt.active_partitions(), 1); +} + +// Basic sanity test modelling a simple 2 node system. +TEST_F(NumaTopologyTest, TwoNode) { + std::vector<SyntheticCpuList> nodes; + nodes.emplace_back("0-5"); + nodes.emplace_back("6-11"); + + const auto nt = CreateNumaTopology<2>(nodes); + + EXPECT_EQ(nt.numa_aware(), true); + EXPECT_EQ(nt.active_partitions(), 2); + + for (int cpu = 0; cpu <= 5; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 0); + } + for (int cpu = 6; cpu <= 11; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 1); + } +} + +// Test that cpulists too long to fit into the 16 byte buffer used by +// InitNumaTopology() parse successfully. +TEST_F(NumaTopologyTest, LongCpuLists) { + std::vector<SyntheticCpuList> nodes; + + // Content from here onwards lies | + // beyond the 16 byte buffer. | + // v + nodes.emplace_back("0-1,2-3,4-5,6-7,8"); // Right after a comma + nodes.emplace_back("9,10,11,12,13,14,15-19"); // Right before a comma + nodes.emplace_back("20-21,22-23,24-25,26-29"); // Within range end + nodes.emplace_back("30-32,33,34,35,36-38,39"); // Within range start + nodes.emplace_back("40-43,44,45-49"); + + const auto nt = CreateNumaTopology<3>(nodes); + + EXPECT_EQ(nt.numa_aware(), true); + EXPECT_EQ(nt.active_partitions(), 3); + + for (int cpu = 0; cpu <= 8; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 0); + } + for (int cpu = 9; cpu <= 19; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 1); + } + for (int cpu = 20; cpu <= 29; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 2); + } + for (int cpu = 30; cpu <= 39; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 0); + } + for (int cpu = 40; cpu <= 49; cpu++) { + EXPECT_EQ(nt.GetCpuPartition(cpu), 1); + } +} + +// Ensure we can initialize using the host system's real NUMA topology +// information. +TEST_F(NumaTopologyTest, Host) { + NumaTopology<4> nt; + nt.Init(); + + // We don't actually know anything about the host, so there's not much more + // we can do beyond checking that we didn't crash. +} + +// Ensure that we can parse randomized cpulists correctly. +TEST(ParseCpulistTest, Random) { + absl::BitGen gen; + + static constexpr int kIterations = 100; + for (int i = 0; i < kIterations; i++) { + cpu_set_t reference; + CPU_ZERO(&reference); + + // Set a random number of CPUs within the reference set. + const double density = absl::Uniform(gen, 0.0, 1.0); + for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { + if (absl::Bernoulli(gen, density)) { + CPU_SET(cpu, &reference); + } + } + + // Serialize the reference set into a cpulist-style string. + std::vector<std::string> components; + for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { + if (!CPU_ISSET(cpu, &reference)) continue; + + const int start = cpu; + int next = cpu + 1; + while (next < CPU_SETSIZE && CPU_ISSET(next, &reference)) { + cpu = next; + next = cpu + 1; + } + + if (cpu == start) { + components.push_back(absl::StrCat(cpu)); + } else { + components.push_back(absl::StrCat(start, "-", cpu)); + } + } + const std::string serialized = absl::StrJoin(components, ","); + + // Now parse that string using our ParseCpulist function, randomizing the + // amount of data we provide to it from each read. + absl::string_view remaining(serialized); + const cpu_set_t parsed = + ParseCpulist([&](char *const buf, const size_t count) -> ssize_t { + // Calculate how much data we have left to provide. + const size_t max = std::min(count, remaining.size()); + + // If none, we have no choice but to provide nothing. + if (max == 0) return 0; + + // If we do have data, return a randomly sized subset of it to stress + // the logic around reading partial values. + const size_t copy = absl::Uniform(gen, static_cast<size_t>(1), max); + memcpy(buf, remaining.data(), copy); + remaining.remove_prefix(copy); + return copy; + }); + + // We ought to have parsed the same set of CPUs that we serialized. + EXPECT_TRUE(CPU_EQUAL(&parsed, &reference)); + } +} + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h index 6380183a50..22878aacda 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h @@ -34,12 +34,12 @@ #endif #endif -// Annotations for functions that are not affected by nor affect observable -// state of the program. -#if ABSL_HAVE_ATTRIBUTE(const) -#define TCMALLOC_ATTRIBUTE_CONST __attribute__((const)) -#else -#define TCMALLOC_ATTRIBUTE_CONST -#endif - +// Annotations for functions that are not affected by nor affect observable +// state of the program. +#if ABSL_HAVE_ATTRIBUTE(const) +#define TCMALLOC_ATTRIBUTE_CONST __attribute__((const)) +#else +#define TCMALLOC_ATTRIBUTE_CONST +#endif + #endif // TCMALLOC_INTERNAL_OPTIMIZATION_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h index f14798fe74..76161f89b8 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h @@ -26,8 +26,8 @@ ABSL_ATTRIBUTE_WEAK uint64_t TCMalloc_Internal_GetHeapSizeHardLimit(); ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHPAASubrelease(); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval(absl::Duration* v); -ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled(); -ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled(); +ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled(); ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled(); ABSL_ATTRIBUTE_WEAK double TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction(); @@ -37,10 +37,10 @@ ABSL_ATTRIBUTE_WEAK size_t TCMalloc_Internal_GetStats(char* buffer, ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHPAASubrelease(bool v); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetShufflePerCpuCachesEnabled( - bool v); -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled( - bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetShufflePerCpuCachesEnabled( + bool v); +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled( + bool v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v); ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxTotalThreadCacheBytes( diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc index f8706f0f21..770367f05b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc @@ -18,7 +18,7 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> -#include <syscall.h> +#include <syscall.h> #include <unistd.h> #include <atomic> @@ -28,12 +28,12 @@ #include "absl/base/internal/sysinfo.h" #include "tcmalloc/internal/linux_syscall_support.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/optimization.h" #include "tcmalloc/internal/util.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace subtle { namespace percpu { @@ -81,22 +81,22 @@ enum PerCpuInitStatus { ABSL_CONST_INIT static PerCpuInitStatus init_status = kSlowMode; ABSL_CONST_INIT static absl::once_flag init_per_cpu_once; -#if TCMALLOC_PERCPU_USE_RSEQ -ABSL_CONST_INIT static std::atomic<bool> using_upstream_fence{false}; -#endif // TCMALLOC_PERCPU_USE_RSEQ - -// Is this thread's __rseq_abi struct currently registered with the kernel? -static bool ThreadRegistered() { return RseqCpuId() >= kCpuIdInitialized; } +#if TCMALLOC_PERCPU_USE_RSEQ +ABSL_CONST_INIT static std::atomic<bool> using_upstream_fence{false}; +#endif // TCMALLOC_PERCPU_USE_RSEQ +// Is this thread's __rseq_abi struct currently registered with the kernel? +static bool ThreadRegistered() { return RseqCpuId() >= kCpuIdInitialized; } + static bool InitThreadPerCpu() { - // If we're already registered, there's nothing further for us to do. - if (ThreadRegistered()) { + // If we're already registered, there's nothing further for us to do. + if (ThreadRegistered()) { return true; } #ifdef __NR_rseq - return 0 == syscall(__NR_rseq, &__rseq_abi, sizeof(__rseq_abi), 0, - TCMALLOC_PERCPU_RSEQ_SIGNATURE); + return 0 == syscall(__NR_rseq, &__rseq_abi, sizeof(__rseq_abi), 0, + TCMALLOC_PERCPU_RSEQ_SIGNATURE); #endif // __NR_rseq return false; } @@ -115,12 +115,12 @@ static void InitPerCpu() { init_status = kFastMode; #if TCMALLOC_PERCPU_USE_RSEQ - constexpr int kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8); - // It is safe to make the syscall below multiple times. - using_upstream_fence.store( - 0 == syscall(__NR_membarrier, - kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0), - std::memory_order_relaxed); + constexpr int kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8); + // It is safe to make the syscall below multiple times. + using_upstream_fence.store( + 0 == syscall(__NR_membarrier, + kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0), + std::memory_order_relaxed); #endif // TCMALLOC_PERCPU_USE_RSEQ } } @@ -262,22 +262,22 @@ static void SlowFence(const cpu_set_t* cpus) { } } -#if TCMALLOC_PERCPU_USE_RSEQ -static void UpstreamRseqFenceCpu(int cpu) { - ABSL_RAW_CHECK(using_upstream_fence.load(std::memory_order_relaxed), - "upstream fence unavailable."); - - constexpr int kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7); - constexpr int kMEMBARRIER_CMD_FLAG_CPU = (1 << 0); - - int64_t res = syscall(__NR_membarrier, kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, - kMEMBARRIER_CMD_FLAG_CPU, cpu); - - ABSL_RAW_CHECK(res == 0 || res == -ENXIO /* missing CPU */, - "Upstream fence failed."); -} -#endif // TCMALLOC_PERCPU_USE_RSEQ - +#if TCMALLOC_PERCPU_USE_RSEQ +static void UpstreamRseqFenceCpu(int cpu) { + ABSL_RAW_CHECK(using_upstream_fence.load(std::memory_order_relaxed), + "upstream fence unavailable."); + + constexpr int kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7); + constexpr int kMEMBARRIER_CMD_FLAG_CPU = (1 << 0); + + int64_t res = syscall(__NR_membarrier, kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + kMEMBARRIER_CMD_FLAG_CPU, cpu); + + ABSL_RAW_CHECK(res == 0 || res == -ENXIO /* missing CPU */, + "Upstream fence failed."); +} +#endif // TCMALLOC_PERCPU_USE_RSEQ + // Interrupt every concurrently running sibling thread on any cpu in // "cpus", and guarantee our writes up til now are visible to every // other CPU. (cpus == NULL is equivalent to all CPUs.) @@ -295,17 +295,17 @@ void Fence() { // Other operations (or all in RSEQ mode) might just be running on another // CPU. Do something about that: use RSEQ::Fence() to just send interrupts // and restart any such operation. -#if TCMALLOC_PERCPU_USE_RSEQ - if (using_upstream_fence.load(std::memory_order_relaxed)) { - UpstreamRseqFenceCpu(-1); - return; - } -#endif // TCMALLOC_PERCPU_USE_RSEQ - +#if TCMALLOC_PERCPU_USE_RSEQ + if (using_upstream_fence.load(std::memory_order_relaxed)) { + UpstreamRseqFenceCpu(-1); + return; + } +#endif // TCMALLOC_PERCPU_USE_RSEQ + FenceInterruptCPUs(nullptr); } -void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) { +void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) { // Prevent compiler re-ordering of code below. In particular, the call to // GetCurrentCpu must not appear in assembly program order until after any // code that comes before FenceCpu in C++ program order. @@ -313,32 +313,32 @@ void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) { // A useful fast path: nothing needs doing at all to order us with respect // to our own CPU. - if (GetCurrentVirtualCpu(virtual_cpu_id_offset) == cpu) { + if (GetCurrentVirtualCpu(virtual_cpu_id_offset) == cpu) { return; } - if (virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)) { - ASSUME(false); - + if (virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)) { + ASSUME(false); + // With virtual CPUs, we cannot identify the true physical core we need to // interrupt. -#if TCMALLOC_PERCPU_USE_RSEQ - if (using_upstream_fence.load(std::memory_order_relaxed)) { - UpstreamRseqFenceCpu(-1); - return; - } -#endif // TCMALLOC_PERCPU_USE_RSEQ +#if TCMALLOC_PERCPU_USE_RSEQ + if (using_upstream_fence.load(std::memory_order_relaxed)) { + UpstreamRseqFenceCpu(-1); + return; + } +#endif // TCMALLOC_PERCPU_USE_RSEQ FenceInterruptCPUs(nullptr); return; } -#if TCMALLOC_PERCPU_USE_RSEQ - if (using_upstream_fence.load(std::memory_order_relaxed)) { - UpstreamRseqFenceCpu(cpu); - return; - } -#endif // TCMALLOC_PERCPU_USE_RSEQ - +#if TCMALLOC_PERCPU_USE_RSEQ + if (using_upstream_fence.load(std::memory_order_relaxed)) { + UpstreamRseqFenceCpu(cpu); + return; + } +#endif // TCMALLOC_PERCPU_USE_RSEQ + cpu_set_t set; CPU_ZERO(&set); CPU_SET(cpu, &set); @@ -347,6 +347,6 @@ void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) { } // namespace percpu } // namespace subtle -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h index ad2124e0d1..c5f26c0b92 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h @@ -71,9 +71,9 @@ #endif #endif // !defined(TCMALLOC_PERCPU_USE_RSEQ) -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace subtle { namespace percpu { @@ -89,23 +89,23 @@ extern "C" ABSL_PER_THREAD_TLS_KEYWORD volatile kernel_rseq __rseq_abi; static inline int RseqCpuId() { return __rseq_abi.cpu_id; } -static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) { +static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) { #ifdef __x86_64__ - ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id) || - virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)); + ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id) || + virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)); return *reinterpret_cast<short *>(reinterpret_cast<uintptr_t>(&__rseq_abi) + - virtual_cpu_id_offset); + virtual_cpu_id_offset); #else - ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id)); + ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id)); return RseqCpuId(); #endif } #else // !TCMALLOC_PERCPU_USE_RSEQ static inline int RseqCpuId() { return kCpuIdUnsupported; } -static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) { - return kCpuIdUnsupported; -} +static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) { + return kCpuIdUnsupported; +} #endif typedef int (*OverflowHandler)(int cpu, size_t cl, void *item); @@ -114,39 +114,39 @@ typedef void *(*UnderflowHandler)(int cpu, size_t cl); // Functions below are implemented in the architecture-specific percpu_rseq_*.S // files. extern "C" { -int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p, - intptr_t old_val, intptr_t new_val); +int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p, + intptr_t old_val, intptr_t new_val); #ifndef __x86_64__ -int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift, - OverflowHandler f); -int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item, - OverflowHandler f); -void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f, - size_t shift); -void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl, - UnderflowHandler f); +int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift, + OverflowHandler f); +int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item, + OverflowHandler f); +void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f, + size_t shift); +void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl, + UnderflowHandler f); #endif // __x86_64__ // Push a batch for a slab which the Shift equal to // TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT -size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len); +size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl, + void **batch, size_t len); // Pop a batch for a slab which the Shift equal to // TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT -size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len); +size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl, + void **batch, size_t len); #ifdef __x86_64__ -int TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(int target_cpu, intptr_t *p, - intptr_t old_val, - intptr_t new_val); -size_t TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(void *ptr, size_t cl, - void **batch, - size_t len); -size_t TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(void *ptr, size_t cl, - void **batch, size_t len); +int TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(int target_cpu, intptr_t *p, + intptr_t old_val, + intptr_t new_val); +size_t TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(void *ptr, size_t cl, + void **batch, + size_t len); +size_t TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(void *ptr, size_t cl, + void **batch, size_t len); #endif } @@ -207,15 +207,15 @@ inline int GetCurrentCpu() { return cpu; } -inline int GetCurrentVirtualCpuUnsafe(const size_t virtual_cpu_id_offset) { - return VirtualRseqCpuId(virtual_cpu_id_offset); -} +inline int GetCurrentVirtualCpuUnsafe(const size_t virtual_cpu_id_offset) { + return VirtualRseqCpuId(virtual_cpu_id_offset); +} -inline int GetCurrentVirtualCpu(const size_t virtual_cpu_id_offset) { +inline int GetCurrentVirtualCpu(const size_t virtual_cpu_id_offset) { // We can't use the unsafe version unless we have the appropriate version of // the rseq extension. This also allows us a convenient escape hatch if the // kernel changes the way it uses special-purpose registers for CPU IDs. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); + int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); // We open-code the check for fast-cpu availability since we do not want to // force initialization in the first-call case. This so done so that we can @@ -307,18 +307,18 @@ inline void TSANMemoryBarrierOn(void *p) { // These methods may *only* be called if IsFast() has been called by the current // thread (and it returned true). inline int CompareAndSwapUnsafe(int target_cpu, std::atomic<intptr_t> *p, - intptr_t old_val, intptr_t new_val, - const size_t virtual_cpu_id_offset) { + intptr_t old_val, intptr_t new_val, + const size_t virtual_cpu_id_offset) { TSANMemoryBarrierOn(p); #if TCMALLOC_PERCPU_USE_RSEQ - switch (virtual_cpu_id_offset) { + switch (virtual_cpu_id_offset) { case offsetof(kernel_rseq, cpu_id): - return TcmallocSlab_Internal_PerCpuCmpxchg64( + return TcmallocSlab_Internal_PerCpuCmpxchg64( target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p), old_val, new_val); #ifdef __x86_64__ case offsetof(kernel_rseq, vcpu_id): - return TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU( + return TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU( target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p), old_val, new_val); #endif // __x86_64__ @@ -330,13 +330,13 @@ inline int CompareAndSwapUnsafe(int target_cpu, std::atomic<intptr_t> *p, #endif // !TCMALLOC_PERCPU_USE_RSEQ } -void FenceCpu(int cpu, const size_t virtual_cpu_id_offset); +void FenceCpu(int cpu, const size_t virtual_cpu_id_offset); } // namespace percpu } // namespace subtle -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // !__ASSEMBLER__ #endif // TCMALLOC_INTERNAL_PERCPU_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S index 3cdaf17835..8abe7c9a08 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S @@ -70,20 +70,20 @@ #define PINSECTION(label) #endif -// A function within a guarded memory region must start with a BTI C -// instruction. -// So per ABI that includes any externally visible code label. -// Using hint to make sure we can use this on targets that support BTI and -// targets that don't. It will behave as a no-op on targets that do not -// support BTI or outside a guarded memory region. -#ifdef __ARM_FEATURE_BTI_DEFAULT -#define BTI_C hint 34 -#define TAILCALL(x) mov x16, x; br x16 -#else -#define BTI_C -#define TAILCALL(x) br x -#endif - +// A function within a guarded memory region must start with a BTI C +// instruction. +// So per ABI that includes any externally visible code label. +// Using hint to make sure we can use this on targets that support BTI and +// targets that don't. It will behave as a no-op on targets that do not +// support BTI or outside a guarded memory region. +#ifdef __ARM_FEATURE_BTI_DEFAULT +#define BTI_C hint 34 +#define TAILCALL(x) mov x16, x; br x16 +#else +#define BTI_C +#define TAILCALL(x) br x +#endif + // This macro defines: // * the rseq_cs instance that we'll use for label's critical section. // * a trampoline to return to when we abort. This label_trampoline is @@ -115,7 +115,7 @@ .type label##_trampoline, @function; \ label##_trampoline: \ .cfi_startproc; \ - BTI_C; \ + BTI_C; \ b .L##label##_abort; \ .cfi_endproc; \ .size label##_trampoline, . - label##_trampoline; \ @@ -169,7 +169,7 @@ label##_trampoline: \ * we can not guarantee it will we must save and restore the registers used to * store the arguments of our functions. The function with most arguments has 5 * arguments, so we save x0-x4 and lr. - * TODO: Add PAC support because we are spiling LR. + * TODO: Add PAC support because we are spiling LR. */ #define START_RSEQ(src) \ .L##src##_abort: \ @@ -199,7 +199,7 @@ label##_trampoline: \ /* start of atomic restartable sequences */ /* - * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p, + * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p, * long old_val, long new_val) * w0: target_cpu * x1: p @@ -207,30 +207,30 @@ label##_trampoline: \ * x3: new_val */ .p2align 6 /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PerCpuCmpxchg64 - .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function -TcmallocSlab_Internal_PerCpuCmpxchg64: + .globl TcmallocSlab_Internal_PerCpuCmpxchg64 + .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function +TcmallocSlab_Internal_PerCpuCmpxchg64: .cfi_startproc - BTI_C - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64) + BTI_C + START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64) FETCH_CPU(w4) cmp w0, w4 /* check cpu vs current_cpu */ - bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit + bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit ldr x6, [x1] cmp x6, x2 /* verify *p == old */ - bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch + bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch str x3, [x1] -.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit: mov x0, x4 ret /* return current cpu, indicating mismatch OR success */ -.LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch: mov x0, #-1 /* mismatch versus "old" or "check", return -1 */ ret .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) +ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) -/* size_t TcmallocSlab_Internal_PushBatch_FixedShift( +/* size_t TcmallocSlab_Internal_PushBatch_FixedShift( * void *ptr (x0), * size_t cl (w1), * void** batch (x2), @@ -255,12 +255,12 @@ DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) * } */ .p2align 6 /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PushBatch_FixedShift - .type TcmallocSlab_Internal_PushBatch_FixedShift, @function -TcmallocSlab_Internal_PushBatch_FixedShift: + .globl TcmallocSlab_Internal_PushBatch_FixedShift + .type TcmallocSlab_Internal_PushBatch_FixedShift, @function +TcmallocSlab_Internal_PushBatch_FixedShift: .cfi_startproc - BTI_C - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift) + BTI_C + START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift) FETCH_CPU(w8) lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */ add x8, x0, x8 @@ -268,7 +268,7 @@ TcmallocSlab_Internal_PushBatch_FixedShift: ldrh w9, [x4] /* r9 = current */ ldrh w10, [x4, #6] /* r10 = end */ cmp w9, w10 - bge .LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity + bge .LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity add x11, x2, x3, LSL #3 /* r11 = batch + len * 8 */ sub w10, w10, w9 /* r10 = free capacity */ cmp w3, w10 @@ -277,24 +277,24 @@ TcmallocSlab_Internal_PushBatch_FixedShift: add x13, x9, x10 /* r13 = current + amount we are pushing. */ add x9, x8, x9, LSL #3 /* r9 = current cpu slab stack */ add x14, x8, x13, LSL #3 /* r14 = new current address */ -.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: ldr x12, [x11, #-8]! /* r12 = [--r11] */ str x12, [x9], #8 /* [r9++] = r12 */ cmp x9, x14 /* if current cpu slab address == new current address */ - bne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop + bne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop strh w13, [x4] /* store new current index */ -.LTcmallocSlab_Internal_PushBatch_FixedShift_commit: +.LTcmallocSlab_Internal_PushBatch_FixedShift_commit: mov x0, x10 ret -.LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity: +.LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity: mov x0, #0 ret .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) +ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) -/* size_t TcmallocSlab_Internal_PopBatch_FixedShift( +/* size_t TcmallocSlab_Internal_PopBatch_FixedShift( * void *ptr (x0), * size_t cl (w1), * void** batch (x2), @@ -319,12 +319,12 @@ DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) * } */ .p2align 6 /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PopBatch_FixedShift - .type TcmallocSlab_Internal_PopBatch_FixedShift, @function -TcmallocSlab_Internal_PopBatch_FixedShift: + .globl TcmallocSlab_Internal_PopBatch_FixedShift + .type TcmallocSlab_Internal_PopBatch_FixedShift, @function +TcmallocSlab_Internal_PopBatch_FixedShift: .cfi_startproc - BTI_C - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift) + BTI_C + START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift) FETCH_CPU(w8) lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */ add x8, x0, x8 @@ -332,7 +332,7 @@ TcmallocSlab_Internal_PopBatch_FixedShift: ldrh w9, [x4] /* current */ ldrh w10, [x4, #4] /* begin */ cmp w10, w9 - bhs .LTcmallocSlab_Internal_PopBatch_FixedShift_no_items + bhs .LTcmallocSlab_Internal_PopBatch_FixedShift_no_items sub w11, w9, w10 /* r11 = available items */ cmp w3, w11 csel w11, w3, w11, ls /* r11 = min(len, available items), amount we are @@ -341,27 +341,27 @@ TcmallocSlab_Internal_PopBatch_FixedShift: sub x9, x9, x11 /* update new current */ mov x12, x2 /* r12 = batch */ add x14, x2, x11, LSL #3 /* r14 = batch + amount we are popping*8 */ -.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: ldr x10, [x13, #-8]! /* r10 = [--r13] */ str x10, [x12], #8 /* [r12++] = r10 */ cmp x12, x14 /* if current batch == batch + amount we are popping */ - bne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop + bne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop strh w9, [x4] /* store new current */ -.LTcmallocSlab_Internal_PopBatch_FixedShift_commit: +.LTcmallocSlab_Internal_PopBatch_FixedShift_commit: mov x0, x11 ret -.LTcmallocSlab_Internal_PopBatch_FixedShift_no_items: +.LTcmallocSlab_Internal_PopBatch_FixedShift_no_items: mov x0, #0 ret .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift) +ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift) - .globl TcmallocSlab_Internal_Push - .type TcmallocSlab_Internal_Push, @function -TcmallocSlab_Internal_Push: -.LTcmallocSlab_Internal_Push_entry: + .globl TcmallocSlab_Internal_Push + .type TcmallocSlab_Internal_Push, @function +TcmallocSlab_Internal_Push: +.LTcmallocSlab_Internal_Push_entry: .cfi_startproc // Arguments use: // * x0: (Argument: Slabs*) cpu_0_slab_ptr @@ -372,8 +372,8 @@ TcmallocSlab_Internal_Push: // Return value: current CPU // Available x5-x15 - BTI_C - START_RSEQ(TcmallocSlab_Internal_Push) + BTI_C + START_RSEQ(TcmallocSlab_Internal_Push) FETCH_CPU(w8) lsl x9, x8, x3 add x9, x0, x9 @@ -381,25 +381,25 @@ TcmallocSlab_Internal_Push: ldrh w12, [x10] /* current */ ldrh w11, [x10, #6] /* end */ cmp w11, w12 - ble .LTcmallocSlab_Internal_Push_no_capacity + ble .LTcmallocSlab_Internal_Push_no_capacity str x2, [x9, x12, LSL #3] add w12, w12, #1 strh w12, [x10] -.LTcmallocSlab_Internal_Push_commit: +.LTcmallocSlab_Internal_Push_commit: mov x0, x8 ret -.LTcmallocSlab_Internal_Push_no_capacity: +.LTcmallocSlab_Internal_Push_no_capacity: mov x0, x8 - TAILCALL(x4) -.LTcmallocSlab_Internal_Push_region3: + TAILCALL(x4) +.LTcmallocSlab_Internal_Push_region3: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push) +ENCODE_SIZE(TcmallocSlab_Internal_Push) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push) - .globl TcmallocSlab_Internal_Push_FixedShift - .type TcmallocSlab_Internal_Push_FixedShift, @function -TcmallocSlab_Internal_Push_FixedShift: + .globl TcmallocSlab_Internal_Push_FixedShift + .type TcmallocSlab_Internal_Push_FixedShift, @function +TcmallocSlab_Internal_Push_FixedShift: .cfi_startproc // Arguments use: // * x0: (Argument: Slabs*) cpu_0_slab_ptr @@ -409,8 +409,8 @@ TcmallocSlab_Internal_Push_FixedShift: // Return value: current CPU // Available x4-x15 - BTI_C - START_RSEQ(TcmallocSlab_Internal_Push_FixedShift) + BTI_C + START_RSEQ(TcmallocSlab_Internal_Push_FixedShift) FETCH_CPU(w8) lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT add x9, x0, x9 @@ -418,23 +418,23 @@ TcmallocSlab_Internal_Push_FixedShift: ldrh w12, [x10] /* current */ ldrh w11, [x10, #6] /* end */ cmp w11, w12 - ble .LTcmallocSlab_Internal_Push_FixedShift_no_capacity + ble .LTcmallocSlab_Internal_Push_FixedShift_no_capacity str x2, [x9, x12, LSL #3] add w12, w12, #1 strh w12, [x10] -.LTcmallocSlab_Internal_Push_FixedShift_commit: +.LTcmallocSlab_Internal_Push_FixedShift_commit: mov x0, x8 ret -.LTcmallocSlab_Internal_Push_FixedShift_no_capacity: +.LTcmallocSlab_Internal_Push_FixedShift_no_capacity: mov x0, x8 - TAILCALL(x3) + TAILCALL(x3) .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift) +ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift) - .globl TcmallocSlab_Internal_Pop_FixedShift - .type TcmallocSlab_Internal_Pop_FixedShift, @function -TcmallocSlab_Internal_Pop_FixedShift: + .globl TcmallocSlab_Internal_Pop_FixedShift + .type TcmallocSlab_Internal_Pop_FixedShift, @function +TcmallocSlab_Internal_Pop_FixedShift: .cfi_startproc // Arguments use: // * x0: (Argument: Slabs*) cpu_0_slab_ptr @@ -443,8 +443,8 @@ TcmallocSlab_Internal_Pop_FixedShift: // Return value: current CPU // Available x3-x15 - BTI_C - START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift) + BTI_C + START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift) FETCH_CPU(w8) /* r8 = CPU */ lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* r9 = CPU shifted */ @@ -453,23 +453,23 @@ TcmallocSlab_Internal_Pop_FixedShift: ldrh w12, [x10] /* r12 = current index */ ldrh w11, [x10, #4] /* r11 = begin index */ cmp w11, w12 /* if begin >= current */ - bge .LTcmallocSlab_Internal_Pop_FixedShift_no_items + bge .LTcmallocSlab_Internal_Pop_FixedShift_no_items sub w12, w12, #1 /* r12 = current-- */ ldr x3, [x9, x12, LSL #3] /* r3 = [start + current * 8] */ strh w12, [x10] /* store new current index */ -.LTcmallocSlab_Internal_Pop_FixedShift_commit: +.LTcmallocSlab_Internal_Pop_FixedShift_commit: mov x0, x3 /* return popped item */ ret -.LTcmallocSlab_Internal_Pop_FixedShift_no_items: +.LTcmallocSlab_Internal_Pop_FixedShift_no_items: mov x0, x8 /* call overflow handler with CPU ID */ - TAILCALL(x2) + TAILCALL(x2) .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift) +ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift) - .globl TcmallocSlab_Internal_Pop - .type TcmallocSlab_Internal_Pop, @function -TcmallocSlab_Internal_Pop: + .globl TcmallocSlab_Internal_Pop + .type TcmallocSlab_Internal_Pop, @function +TcmallocSlab_Internal_Pop: .cfi_startproc // Arguments use: // * x0: (Argument: Slabs*) cpu_0_slab_ptr @@ -479,8 +479,8 @@ TcmallocSlab_Internal_Pop: // Return value: Value // Available x4-x15 - BTI_C - START_RSEQ(TcmallocSlab_Internal_Pop) + BTI_C + START_RSEQ(TcmallocSlab_Internal_Pop) FETCH_CPU(w8) /* r8 = CPU ID */ lsl x9, x8, x3 /* x9 = CPU shifted by (r3) */ add x9, x0, x9 /* x9 = start of this CPU region */ @@ -488,37 +488,37 @@ TcmallocSlab_Internal_Pop: ldrh w12, [x10] /* r12 = current index */ ldrh w11, [x10, #4] /* x11 = begin index */ cmp w11, w12 /* if begin >= current */ - bge .LTcmallocSlab_Internal_Pop_no_items + bge .LTcmallocSlab_Internal_Pop_no_items sub w12, w12, #1 /* r12 = current-- */ ldr x4, [x9, x12, LSL #3] /* r4 = [start + current * 8] */ strh w12, [x10] /* update current index */ -.LTcmallocSlab_Internal_Pop_commit: +.LTcmallocSlab_Internal_Pop_commit: mov x0, x4 /* return popped item */ ret -.LTcmallocSlab_Internal_Pop_no_items: +.LTcmallocSlab_Internal_Pop_no_items: mov x0, x8 /* call overflow handler with CPU ID */ - TAILCALL(x2) + TAILCALL(x2) .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop) +ENCODE_SIZE(TcmallocSlab_Internal_Pop) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop) .section .note.GNU-stack,"",@progbits - -/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ -#define GNU_PROPERTY(type, value) \ - .section .note.gnu.property, "a"; \ - .p2align 3; \ - .word 4; \ - .word 16; \ - .word 5; \ - .asciz "GNU"; \ - .word type; \ - .word 4; \ - .word value; \ - .word 0; - -/* Add GNU property note if built with branch protection. */ - -#if defined(__ARM_FEATURE_BTI_DEFAULT) -GNU_PROPERTY (0xc0000000, 1) -#endif + +/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ +#define GNU_PROPERTY(type, value) \ + .section .note.gnu.property, "a"; \ + .p2align 3; \ + .word 4; \ + .word 16; \ + .word 5; \ + .asciz "GNU"; \ + .word type; \ + .word 4; \ + .word value; \ + .word 0; + +/* Add GNU property note if built with branch protection. */ + +#if defined(__ARM_FEATURE_BTI_DEFAULT) +GNU_PROPERTY (0xc0000000, 1) +#endif diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S index 234f28c2e7..4a63738446 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S @@ -223,13 +223,13 @@ label##_trampoline: \ #endif //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_PerCpuCmpxchg64 +// TcmallocSlab_Internal_PerCpuCmpxchg64 //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_PerCpuCmpxchg64 -.type TcmallocSlab_Internal_PerCpuCmpxchg64, @function -TcmallocSlab_Internal_PerCpuCmpxchg64: -.LTcmallocSlab_Internal_PerCpuCmpxchg64_entry: +.globl TcmallocSlab_Internal_PerCpuCmpxchg64 +.type TcmallocSlab_Internal_PerCpuCmpxchg64, @function +TcmallocSlab_Internal_PerCpuCmpxchg64: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_entry: .cfi_startproc // Register use: // @@ -241,7 +241,7 @@ TcmallocSlab_Internal_PerCpuCmpxchg64: // * r8: The current value of *p. // - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64) + START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64) // Are we running on the target CPU? GET_CPU(%r7) @@ -257,7 +257,7 @@ TcmallocSlab_Internal_PerCpuCmpxchg64: // Store the new value, committing the operation. std %r6, 0(%r4) -.LTcmallocSlab_Internal_PerCpuCmpxchg64_critical_limit: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_critical_limit: // Return the target CPU, which is already in r3. blr @@ -272,20 +272,20 @@ TcmallocSlab_Internal_PerCpuCmpxchg64: li %r3, -1 blr -.LTcmallocSlab_Internal_PerCpuCmpxchg64_function_limit: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64); +ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64); //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Push +// TcmallocSlab_Internal_Push //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_Push -.type TcmallocSlab_Internal_Push, @function -TcmallocSlab_Internal_Push: -.LTcmallocSlab_Internal_Push_entry: +.globl TcmallocSlab_Internal_Push +.type TcmallocSlab_Internal_Push, @function +TcmallocSlab_Internal_Push: +.LTcmallocSlab_Internal_Push_entry: .cfi_startproc // Arguments use: // * r3: (Argument: Slabs*) cpu_0_slab_ptr @@ -298,7 +298,7 @@ TcmallocSlab_Internal_Push: // Note that r12 may be overwritten in rseq_restart_address_internal so // cannot be relied upon across restartable sequence boundaries. - START_RSEQ(TcmallocSlab_Internal_Push) + START_RSEQ(TcmallocSlab_Internal_Push) GET_CPU(%r8) // r8 = current CPU, includes MASK operation sld %r9, %r8, %r6 // r9 = r8 << shift (r6) @@ -308,34 +308,34 @@ TcmallocSlab_Internal_Push: lhz %r12, 0(%r10) // r12 = current index lhz %r11, 6(%r10) // r11 = length cmpld %cr7, %r11, %r12 // compare current index with length - ble %cr7, .LTcmallocSlab_Internal_Push_no_capacity + ble %cr7, .LTcmallocSlab_Internal_Push_no_capacity rldicr %r11, %r12, 3, 60 // r11 = offset of current index addi %r12, %r12, 1 // current index += 1 stdx %r5, %r9, %r11 // store pointer p (r5) into current offset sth %r12, 0(%r10) // update current index -.LTcmallocSlab_Internal_Push_critical_limit: +.LTcmallocSlab_Internal_Push_critical_limit: mr %r3, %r8 // Return current CPU in r3 blr -.LTcmallocSlab_Internal_Push_no_capacity: +.LTcmallocSlab_Internal_Push_no_capacity: mr %r3, %r8 // Place current CPU in r3 // r7 already contains target function b .LPushOverflowTrampoline -.LTcmallocSlab_Internal_Push_function_limit: +.LTcmallocSlab_Internal_Push_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push); +ENCODE_SIZE(TcmallocSlab_Internal_Push); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push); //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Push_FixedShift +// TcmallocSlab_Internal_Push_FixedShift //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_Push_FixedShift -.type TcmallocSlab_Internal_Push_FixedShift, @function -TcmallocSlab_Internal_Push_FixedShift: -.LTcmallocSlab_Internal_Push_FixedShift_entry: +.globl TcmallocSlab_Internal_Push_FixedShift +.type TcmallocSlab_Internal_Push_FixedShift, @function +TcmallocSlab_Internal_Push_FixedShift: +.LTcmallocSlab_Internal_Push_FixedShift_entry: .cfi_startproc // Arguments use: // * r3: (Argument: Slabs*) cpu_0_slab_ptr @@ -343,7 +343,7 @@ TcmallocSlab_Internal_Push_FixedShift: // * r5: (Argument: uintptr_t) p // * r6: (Argument: uintptr_t) f - START_RSEQ(TcmallocSlab_Internal_Push_FixedShift) + START_RSEQ(TcmallocSlab_Internal_Push_FixedShift) GET_CPU_UNMASKED(%r7) // r7 = unmasked CPU // Mask upper 52 bits of %r7 and shift left in single @@ -356,35 +356,35 @@ TcmallocSlab_Internal_Push_FixedShift: lhz %r10, 0(%r9) // r10 = current index lhz %r11, 6(%r9) // r11 = end index cmpld %cr7, %r11, %r10 // Check for space - ble %cr7, .LTcmallocSlab_Internal_Push_FixedShift_no_capacity + ble %cr7, .LTcmallocSlab_Internal_Push_FixedShift_no_capacity rldicr %r11, %r10, 3, 60 // r11 = offset of current index addi %r10, %r10, 1 // current index ++ stdx %r5, %r8, %r11 // store the item (from r5) sth %r10, 0(%r9) // store current index -.LTcmallocSlab_Internal_Push_FixedShift_critical_limit: +.LTcmallocSlab_Internal_Push_FixedShift_critical_limit: MASK_CPU(%r3, %r7) // Return and mask CPU into %r3 blr -.LTcmallocSlab_Internal_Push_FixedShift_no_capacity: +.LTcmallocSlab_Internal_Push_FixedShift_no_capacity: MASK_CPU(%r3, %r7) // Move and mask CPU into %r3 mr %r7, %r6 // Move target function into r7 b .LPushOverflowTrampoline -.LTcmallocSlab_Internal_Push_FixedShift_function_limit: +.LTcmallocSlab_Internal_Push_FixedShift_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift); +ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift); //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Pop +// TcmallocSlab_Internal_Pop //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_Pop -.type TcmallocSlab_Internal_Pop, @function -TcmallocSlab_Internal_Pop: -.LTcmallocSlab_Internal_Pop_entry: +.globl TcmallocSlab_Internal_Pop +.type TcmallocSlab_Internal_Pop, @function +TcmallocSlab_Internal_Pop: +.LTcmallocSlab_Internal_Pop_entry: .cfi_startproc // Arguments use: // * r3: (Argument: Slabs*) cpu_0_slab_ptr @@ -394,7 +394,7 @@ TcmallocSlab_Internal_Pop: // Available r7 r8 r9 r10 r11 // r12 can be used as a temporary within rseq - START_RSEQ(TcmallocSlab_Internal_Pop) + START_RSEQ(TcmallocSlab_Internal_Pop) GET_CPU(%r7) // r7 = CPU, includes mask operation sld %r12, %r7, %r6 // r12 = CPU shifted by shift (r6) @@ -404,41 +404,41 @@ TcmallocSlab_Internal_Pop: lhz %r9, 0(%r8) // r9 = current index lhz %r10, 4(%r8) // r10 = begin cmpld %cr7, %r10, %r9 // Check that we have items to pop - bge %cr7, .LTcmallocSlab_Internal_Pop_no_item + bge %cr7, .LTcmallocSlab_Internal_Pop_no_item subi %r9, %r9, 1 // r9 = current index -- rldicr %r10, %r9, 3, 60 // r10 = offset to current item ldx %r11, %r12, %r10 // load the item from base + index sth %r9, 0(%r8) // store current index -.LTcmallocSlab_Internal_Pop_critical_limit: +.LTcmallocSlab_Internal_Pop_critical_limit: // Move the item into r3, now that it's safe to do so. mr %r3, %r11 blr -.LTcmallocSlab_Internal_Pop_no_item: +.LTcmallocSlab_Internal_Pop_no_item: mr %r3, %r7 // Place CPU into r3 b .LPopUnderflowTrampoline -.LTcmallocSlab_Internal_Pop_function_limit: +.LTcmallocSlab_Internal_Pop_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop); +ENCODE_SIZE(TcmallocSlab_Internal_Pop); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop); //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_Pop_FixedShift +// TcmallocSlab_Internal_Pop_FixedShift //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_Pop_FixedShift -.type TcmallocSlab_Internal_Pop_FixedShift, @function -TcmallocSlab_Internal_Pop_FixedShift: -.LTcmallocSlab_Internal_Pop_FixedShift_entry: +.globl TcmallocSlab_Internal_Pop_FixedShift +.type TcmallocSlab_Internal_Pop_FixedShift, @function +TcmallocSlab_Internal_Pop_FixedShift: +.LTcmallocSlab_Internal_Pop_FixedShift_entry: .cfi_startproc // Arguments use: // * r3: (Argument: Slabs*) cpu_0_slab_ptr // * r4: (Argument: uintptr_t) cl // * r5: (Argument: uintptr_t) f - START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift) + START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift) GET_CPU_UNMASKED(%r6) // r6 = current CPU // Following instruction combines mask and shift @@ -450,34 +450,34 @@ TcmallocSlab_Internal_Pop_FixedShift: lhz %r9, 0(%r8) // r9 = current index lhz %r10, 4(%r8) // r10 = begin index cmpld %cr7, %r10, %r9 // Check that there are elements available - bge %cr7, .LTcmallocSlab_Internal_Pop_FixedShift_no_item + bge %cr7, .LTcmallocSlab_Internal_Pop_FixedShift_no_item subi %r9, %r9, 1 // current index -- rldicr %r10, %r9, 3, 60 // r10 = offset of current index ldx %r11, %r7, %r10 // r11 = load the item sth %r9, 0(%r8) // update current index -.LTcmallocSlab_Internal_Pop_FixedShift_critical_limit: +.LTcmallocSlab_Internal_Pop_FixedShift_critical_limit: // Move the item into r3, now that it's safe to do so. mr %r3, %r11 blr -.LTcmallocSlab_Internal_Pop_FixedShift_no_item: +.LTcmallocSlab_Internal_Pop_FixedShift_no_item: MASK_CPU(%r3, %r6) // Extract CPU from unmasked value in %r6 b .LPopUnderflowTrampoline -.LTcmallocSlab_Internal_Pop_FixedShift_function_limit: +.LTcmallocSlab_Internal_Pop_FixedShift_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift); +ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift); //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_PushBatch_FixedShift +// TcmallocSlab_Internal_PushBatch_FixedShift //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_PushBatch_FixedShift -.type TcmallocSlab_Internal_PushBatch_FixedShift, @function -TcmallocSlab_Internal_PushBatch_FixedShift: -.LTcmallocSlab_Internal_PushBatch_FixedShift_entry: +.globl TcmallocSlab_Internal_PushBatch_FixedShift +.type TcmallocSlab_Internal_PushBatch_FixedShift, @function +TcmallocSlab_Internal_PushBatch_FixedShift: +.LTcmallocSlab_Internal_PushBatch_FixedShift_entry: .cfi_startproc // Arguments use: // * r3: (Argument: Slabs*) cpu_0_slab_ptr @@ -485,7 +485,7 @@ TcmallocSlab_Internal_PushBatch_FixedShift: // * r5: (Argument: uintptr_t) batch // * r6: (Argument: uintptr_t) len - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift) + START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift) GET_CPU_UNMASKED(%r7) clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT @@ -496,13 +496,13 @@ TcmallocSlab_Internal_PushBatch_FixedShift: lhz %r11, 6(%r9) // r11 - end sldi %r7, %r6, 3 // r7 - len * 8 cmpld %cr7, %r11, %r10 // current < end? - ble %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit + ble %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit sub %r11, %r11, %r10 // r11 - available capacity // r11 = min(r11, r6) cmpld %cr7, %r6, %r11 - bge %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_min + bge %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_min mr %r11, %r6 -.LTcmallocSlab_Internal_PushBatch_FixedShift_min: +.LTcmallocSlab_Internal_PushBatch_FixedShift_min: add %r11, %r10, %r11 sldi %r11, %r11, 3 sldi %r10, %r10, 3 @@ -510,35 +510,35 @@ TcmallocSlab_Internal_PushBatch_FixedShift: // At this point: // r5 - batch, r7 - offset in the batch // r8 - cpu region, r10 - offset into the cpu region, r11 - limit of offset -.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: subi %r7, %r7, 8 ldx %r12, %r5, %r7 // load the item stdx %r12, %r8, %r10 // store the item addi %r10, %r10, 8 cmpld %cr7, %r10, %r11 - bne %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_loop + bne %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_loop rotrdi %r10, %r10, 3 sth %r10, 0(%r9) // update current -.LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit: +.LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit: // return r6 - r7 / 8 rotrdi %r7, %r7, 3 sub %r3, %r6, %r7 blr -.LTcmallocSlab_Internal_PushBatch_FixedShift_function_limit: +.LTcmallocSlab_Internal_PushBatch_FixedShift_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift); +ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift); //////////////////////////////////////////////////////////////////////// -// TcmallocSlab_Internal_PopBatch_FixedShift +// TcmallocSlab_Internal_PopBatch_FixedShift //////////////////////////////////////////////////////////////////////// -.globl TcmallocSlab_Internal_PopBatch_FixedShift -.type TcmallocSlab_Internal_PopBatch_FixedShift, @function -TcmallocSlab_Internal_PopBatch_FixedShift: -.LTcmallocSlab_Internal_PopBatch_FixedShift_entry: +.globl TcmallocSlab_Internal_PopBatch_FixedShift +.type TcmallocSlab_Internal_PopBatch_FixedShift, @function +TcmallocSlab_Internal_PopBatch_FixedShift: +.LTcmallocSlab_Internal_PopBatch_FixedShift_entry: .cfi_startproc // Arguments use: // * r3: (Argument: Slabs*) cpu_0_slab_ptr @@ -546,7 +546,7 @@ TcmallocSlab_Internal_PopBatch_FixedShift: // * r5: (Argument: uintptr_t) batch // * r6: (Argument: uintptr_t) len - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift) + START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift) GET_CPU_UNMASKED(%r7) clrlsldi %r7, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT @@ -557,13 +557,13 @@ TcmallocSlab_Internal_PopBatch_FixedShift: lhz %r10, 4(%r8) // r10 - begin li %r11, 0 // current position in batch cmpld %cr7, %r10, %r9 - bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit + bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit sub %r10, %r9, %r10 // r10 - available items // r10 = min(r10, r6) cmpld %cr7, %r6, %r10 - bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_min + bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_min mr %r10, %r6 -.LTcmallocSlab_Internal_PopBatch_FixedShift_min: +.LTcmallocSlab_Internal_PopBatch_FixedShift_min: sub %r10, %r9, %r10 sldi %r10, %r10, 3 sldi %r9, %r9, 3 @@ -571,24 +571,24 @@ TcmallocSlab_Internal_PopBatch_FixedShift: // At this point: // r5 - batch, r11 - offset in the batch // r7 - cpu region, r9 - offset into the cpu region, r10 - limit of offset -.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: subi %r9, %r9, 8 ldx %r12, %r7, %r9 // load the item stdx %r12, %r5, %r11 // store the item addi %r11, %r11, 8 cmpld %cr7, %r9, %r10 - bne %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_loop + bne %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_loop rotrdi %r9, %r9, 3 sth %r9, 0(%r8) // update current -.LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit: +.LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit: rotrdi %r3, %r11, 3 blr -.LTcmallocSlab_Internal_PopBatch_FixedShift_function_limit: +.LTcmallocSlab_Internal_PopBatch_FixedShift_function_limit: .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift); -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift); +ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift); +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift); // Input: r7 points to the function to tail call. r3...r6 are args for it. .LPushOverflowTrampoline: diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc index 1438d8c3d8..1616086b1f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc @@ -20,9 +20,9 @@ #if !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace subtle { namespace percpu { @@ -31,44 +31,44 @@ static void Unsupported() { "RSEQ function called on unsupported platform."); } -int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p, - intptr_t old_val, intptr_t new_val) { +int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p, + intptr_t old_val, intptr_t new_val) { Unsupported(); return -1; } -int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift, - OverflowHandler f) { +int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift, + OverflowHandler f) { Unsupported(); return -1; } -int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item, - OverflowHandler f) { +int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item, + OverflowHandler f) { Unsupported(); return -1; } -void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f, - size_t shift) { +void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f, + size_t shift) { Unsupported(); return nullptr; } -void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl, - UnderflowHandler f) { +void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl, + UnderflowHandler f) { Unsupported(); return nullptr; } -size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len) { +size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl, + void **batch, size_t len) { Unsupported(); return 0; } -size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl, - void **batch, size_t len) { +size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl, + void **batch, size_t len) { Unsupported(); return 0; } @@ -80,8 +80,8 @@ int PerCpuReadCycleCounter(int64_t *cycles) { } // namespace percpu } // namespace subtle -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S index 866f4f90ca..fb9c311033 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S @@ -155,11 +155,11 @@ label##_trampoline: \ movl 4(%rax), dest; /* cpuid is 32-bits */ #define FETCH_VCPU(dest) \ movzwl 30(%rax), dest; /* vcpu_id is 16-bits */ -#define START_RSEQ(src) \ - .L##src##_abort: \ - call tcmalloc_internal_tls_fetch_pic@PLT; \ - leaq __rseq_cs_##src(%rip), %r11; \ - movq %r11, 8(%rax); \ +#define START_RSEQ(src) \ + .L##src##_abort: \ + call tcmalloc_internal_tls_fetch_pic@PLT; \ + leaq __rseq_cs_##src(%rip), %r11; \ + movq %r11, 8(%rax); \ .L##src##_start: /* @@ -167,9 +167,9 @@ label##_trampoline: \ * generates a thread-local address which will not change across a missed * restart. This must precede the construction of any preparatory state. */ - .local tcmalloc_internal_tls_fetch_pic - .type tcmalloc_internal_tls_fetch_pic, @function -tcmalloc_internal_tls_fetch_pic: + .local tcmalloc_internal_tls_fetch_pic + .type tcmalloc_internal_tls_fetch_pic, @function +tcmalloc_internal_tls_fetch_pic: .cfi_startproc push %rbp .cfi_def_cfa_offset 16 @@ -205,7 +205,7 @@ tcmalloc_internal_tls_fetch_pic: .cfi_def_cfa_offset 8 ret; /* &__rseq_abi in %rax */ .cfi_endproc -ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic) +ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic) #endif /* !defined(__PIC__) || defined(__PIE__) */ /* ---------------- end helper macros ---------------- */ @@ -221,52 +221,52 @@ ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic) */ /* - * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p, + * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p, * long old_val, long new_val) */ .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PerCpuCmpxchg64 - .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function -TcmallocSlab_Internal_PerCpuCmpxchg64: + .globl TcmallocSlab_Internal_PerCpuCmpxchg64 + .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function +TcmallocSlab_Internal_PerCpuCmpxchg64: .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64); + START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64); FETCH_CPU(%eax); cmp %eax, %edi; /* check cpu vs current_cpu */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit; + jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit; cmp %rdx, (%rsi); /* verify *p == old */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch; + jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch; mov %rcx, (%rsi); -.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit: ret; /* return current cpu, indicating mismatch OR success */ -.LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch: mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */ ret; .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) +ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64) .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU - .type TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU, @function -TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU: + .globl TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU + .type TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU, @function +TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU: .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU); + START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU); FETCH_VCPU(%eax); cmp %eax, %edi; /* check cpu vs current_cpu */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit; + jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit; cmp %rdx, (%rsi); /* verify *p == old */ - jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch; + jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch; mov %rcx, (%rsi); -.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit: ret; /* return current cpu, indicating mismatch OR success */ -.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch: +.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch: mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */ ret; .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) +ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) -/* size_t TcmallocSlab_Internal_PushBatch_FixedShift( +/* size_t TcmallocSlab_Internal_PushBatch_FixedShift( * void *ptr (%rdi), * size_t cl (%rsi), * void** batch (%rdx), @@ -290,11 +290,11 @@ DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU) * } */ .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PushBatch_FixedShift - .type TcmallocSlab_Internal_PushBatch_FixedShift, @function -TcmallocSlab_Internal_PushBatch_FixedShift: + .globl TcmallocSlab_Internal_PushBatch_FixedShift + .type TcmallocSlab_Internal_PushBatch_FixedShift, @function +TcmallocSlab_Internal_PushBatch_FixedShift: .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift); + START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift); FETCH_CPU(%r8d); shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; /* multiply cpu by 256k */ @@ -302,37 +302,37 @@ TcmallocSlab_Internal_PushBatch_FixedShift: movzwq (%r8, %rsi, 8), %r9; /* current */ movzwq 6(%r8, %rsi, 8), %r10; /* end */ cmpq %r10, %r9; - jae .LTcmallocSlab_Internal_PushBatch_FixedShift_full; + jae .LTcmallocSlab_Internal_PushBatch_FixedShift_full; movq %rcx, %r11; /* r11 = copy of len */ subq %r9, %r10; /* r10 = free capacity */ cmpq %rcx, %r10; cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */ addq %r9, %r10; -.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PushBatch_FixedShift_loop: decq %r11; movq (%rdx, %r11, 8), %rax; movq %rax, (%r8, %r9, 8); incq %r9; cmpq %r9, %r10; - jne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop + jne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PushBatch_FixedShift_commit: +.LTcmallocSlab_Internal_PushBatch_FixedShift_commit: movq %rcx, %rax; subq %r11, %rax; ret; -.LTcmallocSlab_Internal_PushBatch_FixedShift_full: +.LTcmallocSlab_Internal_PushBatch_FixedShift_full: xor %rax, %rax; ret; .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) +ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift) .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PushBatch_FixedShift_VCPU - .type TcmallocSlab_Internal_PushBatch_FixedShift_VCPU, @function -TcmallocSlab_Internal_PushBatch_FixedShift_VCPU: + .globl TcmallocSlab_Internal_PushBatch_FixedShift_VCPU + .type TcmallocSlab_Internal_PushBatch_FixedShift_VCPU, @function +TcmallocSlab_Internal_PushBatch_FixedShift_VCPU: .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU); + START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU); FETCH_VCPU(%r8d); shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; /* multiply cpu by 256k */ @@ -340,32 +340,32 @@ TcmallocSlab_Internal_PushBatch_FixedShift_VCPU: movzwq (%r8, %rsi, 8), %r9; /* current */ movzwq 6(%r8, %rsi, 8), %r10; /* end */ cmpq %r10, %r9; - jae .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full; + jae .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full; movq %rcx, %r11; /* r11 = copy of len */ subq %r9, %r10; /* r10 = free capacity */ cmpq %rcx, %r10; cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */ addq %r9, %r10; -.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop: +.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop: decq %r11; movq (%rdx, %r11, 8), %rax; movq %rax, (%r8, %r9, 8); incq %r9; cmpq %r9, %r10; - jne .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop + jne .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_commit: +.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_commit: movq %rcx, %rax; subq %r11, %rax; ret; -.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full: +.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full: xor %rax, %rax; ret; .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) +ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) -/* size_t TcmallocSlab_Internal_PopBatch_FixedShift( +/* size_t TcmallocSlab_Internal_PopBatch_FixedShift( * void *ptr (%rdi), * size_t cl (%rsi), * void** batch (%rdx), @@ -389,11 +389,11 @@ DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU) * } */ .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PopBatch_FixedShift - .type TcmallocSlab_Internal_PopBatch_FixedShift, @function -TcmallocSlab_Internal_PopBatch_FixedShift: + .globl TcmallocSlab_Internal_PopBatch_FixedShift + .type TcmallocSlab_Internal_PopBatch_FixedShift, @function +TcmallocSlab_Internal_PopBatch_FixedShift: .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift); + START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift); FETCH_CPU(%r8d); shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; /* multiply cpu by 256k */ @@ -401,35 +401,35 @@ TcmallocSlab_Internal_PopBatch_FixedShift: movzwq (%r8, %rsi, 8), %r9; /* current */ movzwq 4(%r8, %rsi, 8), %r10; /* begin */ cmp %r10, %r9; - jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_empty; + jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_empty; movq %r9, %r11; subq %r10, %r11; /* r11 = available items */ cmpq %rcx, %r11; cmovaq %rcx, %r11; /* r11 = min(len, available items) */ xorq %rax, %rax; -.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: +.LTcmallocSlab_Internal_PopBatch_FixedShift_loop: decq %r9; movq (%r8, %r9, 8), %r10; movq %r10, (%rdx, %rax, 8); incq %rax; cmpq %rax, %r11; - jne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop + jne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PopBatch_FixedShift_commit: +.LTcmallocSlab_Internal_PopBatch_FixedShift_commit: ret; -.LTcmallocSlab_Internal_PopBatch_FixedShift_empty: +.LTcmallocSlab_Internal_PopBatch_FixedShift_empty: xor %rax, %rax; ret; .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift) +ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift) .p2align 6; /* aligns to 2^6 with NOP filling */ - .globl TcmallocSlab_Internal_PopBatch_FixedShift_VCPU - .type TcmallocSlab_Internal_PopBatch_FixedShift_VCPU, @function -TcmallocSlab_Internal_PopBatch_FixedShift_VCPU: + .globl TcmallocSlab_Internal_PopBatch_FixedShift_VCPU + .type TcmallocSlab_Internal_PopBatch_FixedShift_VCPU, @function +TcmallocSlab_Internal_PopBatch_FixedShift_VCPU: .cfi_startproc - START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU); + START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU); FETCH_VCPU(%r8d); shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8; /* multiply cpu by 256k */ @@ -437,27 +437,27 @@ TcmallocSlab_Internal_PopBatch_FixedShift_VCPU: movzwq (%r8, %rsi, 8), %r9; /* current */ movzwq 4(%r8, %rsi, 8), %r10; /* begin */ cmp %r10, %r9; - jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty; + jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty; movq %r9, %r11; subq %r10, %r11; /* r11 = available items */ cmpq %rcx, %r11; cmovaq %rcx, %r11; /* r11 = min(len, available items) */ xorq %rax, %rax; -.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop: +.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop: decq %r9; movq (%r8, %r9, 8), %r10; movq %r10, (%rdx, %rax, 8); incq %rax; cmpq %rax, %r11; - jne .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop + jne .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop movw %r9w, (%r8, %rsi, 8); -.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_commit: +.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_commit: ret; -.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty: +.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty: xor %rax, %rax; ret; .cfi_endproc -ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU) -DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU) +ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU) +DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU) .section .note.GNU-stack,"",@progbits diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h index 91d15ba908..5264075f1b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h @@ -18,7 +18,7 @@ #include <atomic> #include <cstring> -#include "absl/base/casts.h" +#include "absl/base/casts.h" #include "absl/base/dynamic_annotations.h" #include "absl/base/internal/sysinfo.h" #include "tcmalloc/internal/mincore.h" @@ -46,9 +46,9 @@ #define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0 #endif -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { struct PerCPUMetadataState { size_t virtual_size; @@ -66,24 +66,24 @@ namespace percpu { // Methods of this type must only be used in threads where it is known that the // percpu primitives are available and percpu::IsFast() has previously returned // 'true'. -template <size_t NumClasses> +template <size_t NumClasses> class TcmallocSlab { public: - constexpr TcmallocSlab() = default; + constexpr TcmallocSlab() = default; // Init must be called before any other methods. // <alloc> is memory allocation callback (e.g. malloc). // <capacity> callback returns max capacity for size class <cl>. // <lazy> indicates that per-CPU slabs should be populated on demand - // <shift> indicates the number of bits to shift the CPU ID in order to - // obtain the location of the per-CPU slab. If this parameter matches - // TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT as set in - // percpu_intenal.h then the assembly language versions of push/pop - // batch can be used; otherwise batch operations are emulated. + // <shift> indicates the number of bits to shift the CPU ID in order to + // obtain the location of the per-CPU slab. If this parameter matches + // TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT as set in + // percpu_intenal.h then the assembly language versions of push/pop + // batch can be used; otherwise batch operations are emulated. // // Initial capacity is 0 for all slabs. - void Init(void*(alloc)(size_t size), size_t (*capacity)(size_t cl), bool lazy, - size_t shift); + void Init(void*(alloc)(size_t size), size_t (*capacity)(size_t cl), bool lazy, + size_t shift); // Only may be called if Init(..., lazy = true) was used. void InitCPU(int cpu, size_t (*capacity)(size_t cl)); @@ -129,18 +129,18 @@ class TcmallocSlab { // REQUIRES: len > 0. size_t PopBatch(size_t cl, void** batch, size_t len); - // Decrements the cpu/cl slab's capacity to no less than max(capacity-len, 0) - // and returns the actual decrement applied. It attempts to shrink any - // unused capacity (i.e end-current) in cpu/cl's slab; if it does not have - // enough unused items, it pops up to <len> items from cpu/cl slab and then - // shrinks the freed capacity. - // - // May be called from another processor, not just the <cpu>. - // REQUIRES: len > 0. - typedef void (*ShrinkHandler)(void* arg, size_t cl, void** batch, size_t n); - size_t ShrinkOtherCache(int cpu, size_t cl, size_t len, void* shrink_ctx, - ShrinkHandler f); - + // Decrements the cpu/cl slab's capacity to no less than max(capacity-len, 0) + // and returns the actual decrement applied. It attempts to shrink any + // unused capacity (i.e end-current) in cpu/cl's slab; if it does not have + // enough unused items, it pops up to <len> items from cpu/cl slab and then + // shrinks the freed capacity. + // + // May be called from another processor, not just the <cpu>. + // REQUIRES: len > 0. + typedef void (*ShrinkHandler)(void* arg, size_t cl, void** batch, size_t n); + size_t ShrinkOtherCache(int cpu, size_t cl, size_t len, void* shrink_ctx, + ShrinkHandler f); + // Remove all items (of all classes) from <cpu>'s slab; reset capacity for all // classes to zero. Then, for each sizeclass, invoke // DrainHandler(drain_ctx, cl, <items from slab>, <previous slab capacity>); @@ -159,13 +159,13 @@ class TcmallocSlab { // headers (Header struct). The remaining memory contain slab arrays. struct Slabs { std::atomic<int64_t> header[NumClasses]; - void* mem[]; + void* mem[]; }; - inline int GetCurrentVirtualCpuUnsafe() { - return VirtualRseqCpuId(virtual_cpu_id_offset_); - } - + inline int GetCurrentVirtualCpuUnsafe() { + return VirtualRseqCpuId(virtual_cpu_id_offset_); + } + private: // Slab header (packed, atomically updated 64-bit). struct Header { @@ -175,13 +175,13 @@ class TcmallocSlab { // Copy of end. Updated by Shrink/Grow, but is not overwritten by Drain. uint16_t end_copy; // Lock updates only begin and end with a 32-bit write. - union { - struct { - uint16_t begin; - uint16_t end; - }; - uint32_t lock_update; - }; + union { + struct { + uint16_t begin; + uint16_t end; + }; + uint32_t lock_update; + }; // Lock is used by Drain to stop concurrent mutations of the Header. // Lock sets begin to 0xffff and end to 0, which makes Push and Pop fail @@ -194,36 +194,36 @@ class TcmallocSlab { static_assert(sizeof(Header) == sizeof(std::atomic<int64_t>), "bad Header size"); - Slabs* slabs_ = nullptr; - size_t shift_ = 0; - // This is in units of bytes. - size_t virtual_cpu_id_offset_ = offsetof(kernel_rseq, cpu_id); + Slabs* slabs_ = nullptr; + size_t shift_ = 0; + // This is in units of bytes. + size_t virtual_cpu_id_offset_ = offsetof(kernel_rseq, cpu_id); Slabs* CpuMemoryStart(int cpu) const; std::atomic<int64_t>* GetHeader(int cpu, size_t cl) const; static Header LoadHeader(std::atomic<int64_t>* hdrp); static void StoreHeader(std::atomic<int64_t>* hdrp, Header hdr); static int CompareAndSwapHeader(int cpu, std::atomic<int64_t>* hdrp, - Header old, Header hdr, - size_t virtual_cpu_id_offset); + Header old, Header hdr, + size_t virtual_cpu_id_offset); }; -template <size_t NumClasses> -inline size_t TcmallocSlab<NumClasses>::Length(int cpu, size_t cl) const { +template <size_t NumClasses> +inline size_t TcmallocSlab<NumClasses>::Length(int cpu, size_t cl) const { Header hdr = LoadHeader(GetHeader(cpu, cl)); return hdr.IsLocked() ? 0 : hdr.current - hdr.begin; } -template <size_t NumClasses> -inline size_t TcmallocSlab<NumClasses>::Capacity(int cpu, size_t cl) const { +template <size_t NumClasses> +inline size_t TcmallocSlab<NumClasses>::Capacity(int cpu, size_t cl) const { Header hdr = LoadHeader(GetHeader(cpu, cl)); return hdr.IsLocked() ? 0 : hdr.end - hdr.begin; } -template <size_t NumClasses> -inline size_t TcmallocSlab<NumClasses>::Grow(int cpu, size_t cl, size_t len, - size_t max_cap) { - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; +template <size_t NumClasses> +inline size_t TcmallocSlab<NumClasses>::Grow(int cpu, size_t cl, size_t len, + size_t max_cap) { + const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; std::atomic<int64_t>* hdrp = GetHeader(cpu, cl); for (;;) { Header old = LoadHeader(hdrp); @@ -234,8 +234,8 @@ inline size_t TcmallocSlab<NumClasses>::Grow(int cpu, size_t cl, size_t len, Header hdr = old; hdr.end += n; hdr.end_copy += n; - const int ret = - CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset); + const int ret = + CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset); if (ret == cpu) { return n; } else if (ret >= 0) { @@ -244,9 +244,9 @@ inline size_t TcmallocSlab<NumClasses>::Grow(int cpu, size_t cl, size_t len, } } -template <size_t NumClasses> -inline size_t TcmallocSlab<NumClasses>::Shrink(int cpu, size_t cl, size_t len) { - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; +template <size_t NumClasses> +inline size_t TcmallocSlab<NumClasses>::Shrink(int cpu, size_t cl, size_t len) { + const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; std::atomic<int64_t>* hdrp = GetHeader(cpu, cl); for (;;) { Header old = LoadHeader(hdrp); @@ -257,8 +257,8 @@ inline size_t TcmallocSlab<NumClasses>::Shrink(int cpu, size_t cl, size_t len) { Header hdr = old; hdr.end -= n; hdr.end_copy -= n; - const int ret = - CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset); + const int ret = + CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset); if (ret == cpu) { return n; } else if (ret >= 0) { @@ -268,10 +268,10 @@ inline size_t TcmallocSlab<NumClasses>::Shrink(int cpu, size_t cl, size_t len) { } #if defined(__x86_64__) -template <size_t NumClasses> -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( - typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item, - const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) { +template <size_t NumClasses> +static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( + typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item, + const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) { #if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO asm goto( #else @@ -282,10 +282,10 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( // relocations, but could be read-only for non-PIE builds. ".pushsection __rseq_cs, \"aw?\"\n" ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n" + ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n" + ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n" + ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n" + "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n" ".long 0x0\n" ".long 0x0\n" ".quad 4f\n" @@ -298,20 +298,20 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" "1:\n" ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n" + ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n" // Force this section to be retained. It is for debugging, but is // otherwise not referenced. ".popsection\n" ".pushsection .text.unlikely, \"ax?\"\n" ".byte 0x0f, 0x1f, 0x05\n" ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Push_trampoline_%=\n" - ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Push_trampoline_%=:\n" + ".local TcmallocSlab_Internal_Push_trampoline_%=\n" + ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n" + "TcmallocSlab_Internal_Push_trampoline_%=:\n" "2:\n" "jmp 3f\n" - ".size TcmallocSlab_Internal_Push_trampoline_%=, . - " - "TcmallocSlab_Internal_Push_trampoline_%=;\n" + ".size TcmallocSlab_Internal_Push_trampoline_%=, . - " + "TcmallocSlab_Internal_Push_trampoline_%=;\n" ".popsection\n" // Prepare // @@ -325,14 +325,14 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( // r10: Scratch // r11: Current "3:\n" - "lea __rseq_cs_TcmallocSlab_Internal_Push_%=(%%rip), %%r10\n" + "lea __rseq_cs_TcmallocSlab_Internal_Push_%=(%%rip), %%r10\n" "mov %%r10, %c[rseq_cs_offset](%[rseq_abi])\n" // Start "4:\n" // scratch = __rseq_abi.cpu_id; "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %%r10d\n" // scratch = slabs + scratch - "shlq %b[shift], %%r10\n" + "shlq %b[shift], %%r10\n" "add %[slabs], %%r10\n" // r11 = slabs->current; "movzwq (%%r10, %[cl], 8), %%r11\n" @@ -356,8 +356,8 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( #endif : [rseq_abi] "r"(&__rseq_abi), [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), - [rseq_cpu_offset] "r"(virtual_cpu_id_offset), - [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift), + [rseq_cpu_offset] "r"(virtual_cpu_id_offset), + [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift), [slabs] "r"(slabs), [cl] "r"(cl), [item] "r"(item) : "cc", "memory", "r10", "r11" #if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO @@ -374,168 +374,168 @@ overflow_label: // As of 3/2020, LLVM's asm goto (even with output constraints) only provides // values for the fallthrough path. The values on the taken branches are // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); + int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); return f(cpu, cl, item); } #endif // defined(__x86_64__) -#if defined(__aarch64__) - -template <size_t NumClasses> -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( - typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item, - const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) { - void* region_start; - uint64_t cpu_id; - void* end_ptr; - uintptr_t current; - uintptr_t end; - // Multiply cl by the bytesize of each header - size_t cl_lsl3 = cl * 8; -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - asm goto( -#else - bool overflow; - asm volatile( -#endif - // TODO(b/141629158): __rseq_cs only needs to be writeable to allow for - // relocations, but could be read-only for non-PIE builds. - ".pushsection __rseq_cs, \"aw?\"\n" - ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n" - ".long 0x0\n" - ".long 0x0\n" - ".quad 4f\n" - ".quad 5f - 4f\n" - ".quad 2f\n" - ".popsection\n" -#if !defined(__clang_major__) || __clang_major__ >= 9 - ".reloc 0, R_AARCH64_NONE, 1f\n" -#endif - ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" - "1:\n" - ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - // Force this section to be retained. It is for debugging, but is - // otherwise not referenced. - ".popsection\n" - ".pushsection .text.unlikely, \"ax?\"\n" - ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Push_trampoline_%=\n" - ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Push_trampoline_%=:\n" - "2:\n" - "b 3f\n" - ".popsection\n" - // Prepare - // - // TODO(b/151503411): Pending widespread availability of LLVM's asm - // goto with output contraints - // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can - // return the register allocations to the compiler rather than using - // explicit clobbers. Prior to this, blocks which use asm goto cannot - // also specify outputs. - "3:\n" - // Use current as scratch here to hold address of this function's - // critical section - "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Push_%=\n" - "add %[current], %[current], " - ":lo12:__rseq_cs_TcmallocSlab_Internal_Push_%=\n" - "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n" - // Start - "4:\n" - // cpu_id = __rseq_abi.cpu_id; - "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n" - // region_start = Start of cpu region - "lsl %[region_start], %[cpu_id], %[shift]\n" - "add %[region_start], %[region_start], %[slabs]\n" - // end_ptr = &(slab_headers[0]->end) - "add %[end_ptr], %[region_start], #6\n" - // current = slab_headers[cl]->current (current index) - "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n" - // end = slab_headers[cl]->end (end index) - "ldrh %w[end], [%[end_ptr], %[cl_lsl3]]\n" - // if (ABSL_PREDICT_FALSE(current >= end)) { goto overflow; } - "cmp %[end], %[current]\n" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - "b.le %l[overflow_label]\n" -#else - "b.le 5f\n" - // Important! code below this must not affect any flags (i.e.: ccae) - // If so, the above code needs to explicitly set a ccae return value. -#endif - "str %[item], [%[region_start], %[current], LSL #3]\n" - "add %w[current], %w[current], #1\n" - "strh %w[current], [%[region_start], %[cl_lsl3]]\n" - // Commit - "5:\n" - : [end_ptr] "=&r"(end_ptr), [cpu_id] "=&r"(cpu_id), - [current] "=&r"(current), [end] "=&r"(end), - [region_start] "=&r"(region_start) - -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - , - [overflow] "=@ccae"(overflow) -#endif - : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs), - [cl_lsl3] "r"(cl_lsl3), [item] "r"(item), [rseq_abi] "r"(&__rseq_abi), - [shift] "r"(shift), - // Constants - [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), - [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE) - : "cc", "memory" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - : overflow_label -#endif - ); -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO - if (ABSL_PREDICT_FALSE(overflow)) { - goto overflow_label; - } -#endif - return 0; -overflow_label: -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - // As of 3/2020, LLVM's asm goto (even with output constraints) only provides - // values for the fallthrough path. The values on the taken branches are - // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); -#else - // With asm goto--without output constraints--the value of scratch is - // well-defined by the compiler and our implementation. As an optimization on - // this case, we can avoid looking up cpu_id again, by undoing the - // transformation of cpu_id to the value of scratch. - int cpu = cpu_id; -#endif - return f(cpu, cl, item); -} -#endif // defined (__aarch64__) - -template <size_t NumClasses> -inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab<NumClasses>::Push( +#if defined(__aarch64__) + +template <size_t NumClasses> +static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push( + typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item, + const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) { + void* region_start; + uint64_t cpu_id; + void* end_ptr; + uintptr_t current; + uintptr_t end; + // Multiply cl by the bytesize of each header + size_t cl_lsl3 = cl * 8; +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + asm goto( +#else + bool overflow; + asm volatile( +#endif + // TODO(b/141629158): __rseq_cs only needs to be writeable to allow for + // relocations, but could be read-only for non-PIE builds. + ".pushsection __rseq_cs, \"aw?\"\n" + ".balign 32\n" + ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n" + ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n" + ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n" + "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n" + ".long 0x0\n" + ".long 0x0\n" + ".quad 4f\n" + ".quad 5f - 4f\n" + ".quad 2f\n" + ".popsection\n" +#if !defined(__clang_major__) || __clang_major__ >= 9 + ".reloc 0, R_AARCH64_NONE, 1f\n" +#endif + ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" + "1:\n" + ".balign 8;" + ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n" + // Force this section to be retained. It is for debugging, but is + // otherwise not referenced. + ".popsection\n" + ".pushsection .text.unlikely, \"ax?\"\n" + ".long %c[rseq_sig]\n" + ".local TcmallocSlab_Internal_Push_trampoline_%=\n" + ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n" + "TcmallocSlab_Internal_Push_trampoline_%=:\n" + "2:\n" + "b 3f\n" + ".popsection\n" + // Prepare + // + // TODO(b/151503411): Pending widespread availability of LLVM's asm + // goto with output contraints + // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can + // return the register allocations to the compiler rather than using + // explicit clobbers. Prior to this, blocks which use asm goto cannot + // also specify outputs. + "3:\n" + // Use current as scratch here to hold address of this function's + // critical section + "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Push_%=\n" + "add %[current], %[current], " + ":lo12:__rseq_cs_TcmallocSlab_Internal_Push_%=\n" + "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n" + // Start + "4:\n" + // cpu_id = __rseq_abi.cpu_id; + "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n" + // region_start = Start of cpu region + "lsl %[region_start], %[cpu_id], %[shift]\n" + "add %[region_start], %[region_start], %[slabs]\n" + // end_ptr = &(slab_headers[0]->end) + "add %[end_ptr], %[region_start], #6\n" + // current = slab_headers[cl]->current (current index) + "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n" + // end = slab_headers[cl]->end (end index) + "ldrh %w[end], [%[end_ptr], %[cl_lsl3]]\n" + // if (ABSL_PREDICT_FALSE(current >= end)) { goto overflow; } + "cmp %[end], %[current]\n" +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + "b.le %l[overflow_label]\n" +#else + "b.le 5f\n" + // Important! code below this must not affect any flags (i.e.: ccae) + // If so, the above code needs to explicitly set a ccae return value. +#endif + "str %[item], [%[region_start], %[current], LSL #3]\n" + "add %w[current], %w[current], #1\n" + "strh %w[current], [%[region_start], %[cl_lsl3]]\n" + // Commit + "5:\n" + : [end_ptr] "=&r"(end_ptr), [cpu_id] "=&r"(cpu_id), + [current] "=&r"(current), [end] "=&r"(end), + [region_start] "=&r"(region_start) + +#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + , + [overflow] "=@ccae"(overflow) +#endif + : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs), + [cl_lsl3] "r"(cl_lsl3), [item] "r"(item), [rseq_abi] "r"(&__rseq_abi), + [shift] "r"(shift), + // Constants + [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), + [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE) + : "cc", "memory" +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + : overflow_label +#endif + ); +#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO + if (ABSL_PREDICT_FALSE(overflow)) { + goto overflow_label; + } +#endif + return 0; +overflow_label: +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + // As of 3/2020, LLVM's asm goto (even with output constraints) only provides + // values for the fallthrough path. The values on the taken branches are + // undefined. + int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); +#else + // With asm goto--without output constraints--the value of scratch is + // well-defined by the compiler and our implementation. As an optimization on + // this case, we can avoid looking up cpu_id again, by undoing the + // transformation of cpu_id to the value of scratch. + int cpu = cpu_id; +#endif + return f(cpu, cl, item); +} +#endif // defined (__aarch64__) + +template <size_t NumClasses> +inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab<NumClasses>::Push( size_t cl, void* item, OverflowHandler f) { ASSERT(item != nullptr); -#if defined(__x86_64__) || defined(__aarch64__) - return TcmallocSlab_Internal_Push<NumClasses>(slabs_, cl, item, shift_, f, - virtual_cpu_id_offset_) >= 0; +#if defined(__x86_64__) || defined(__aarch64__) + return TcmallocSlab_Internal_Push<NumClasses>(slabs_, cl, item, shift_, f, + virtual_cpu_id_offset_) >= 0; #else - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { - return TcmallocSlab_Internal_Push_FixedShift(slabs_, cl, item, f) >= 0; + if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + return TcmallocSlab_Internal_Push_FixedShift(slabs_, cl, item, f) >= 0; } else { - return TcmallocSlab_Internal_Push(slabs_, cl, item, shift_, f) >= 0; + return TcmallocSlab_Internal_Push(slabs_, cl, item, shift_, f) >= 0; } #endif } #if defined(__x86_64__) -template <size_t NumClasses> -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( - typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, - UnderflowHandler f, const size_t shift, - const size_t virtual_cpu_id_offset) { +template <size_t NumClasses> +static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( + typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, + UnderflowHandler f, const size_t shift, + const size_t virtual_cpu_id_offset) { void* result; void* scratch; uintptr_t current; @@ -550,10 +550,10 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( // for relocations, but could be read-only for non-PIE builds. ".pushsection __rseq_cs, \"aw?\"\n" ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n" + ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" + ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n" + ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n" + "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n" ".long 0x0\n" ".long 0x0\n" ".quad 4f\n" @@ -566,31 +566,31 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" "1:\n" ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" + ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" // Force this section to be retained. It is for debugging, but is // otherwise not referenced. ".popsection\n" ".pushsection .text.unlikely, \"ax?\"\n" ".byte 0x0f, 0x1f, 0x05\n" ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Pop_trampoline_%=\n" - ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Pop_trampoline_%=:\n" + ".local TcmallocSlab_Internal_Pop_trampoline_%=\n" + ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n" + "TcmallocSlab_Internal_Pop_trampoline_%=:\n" "2:\n" "jmp 3f\n" - ".size TcmallocSlab_Internal_Pop_trampoline_%=, . - " - "TcmallocSlab_Internal_Pop_trampoline_%=;\n" + ".size TcmallocSlab_Internal_Pop_trampoline_%=, . - " + "TcmallocSlab_Internal_Pop_trampoline_%=;\n" ".popsection\n" // Prepare "3:\n" - "lea __rseq_cs_TcmallocSlab_Internal_Pop_%=(%%rip), %[scratch];\n" + "lea __rseq_cs_TcmallocSlab_Internal_Pop_%=(%%rip), %[scratch];\n" "mov %[scratch], %c[rseq_cs_offset](%[rseq_abi])\n" // Start "4:\n" // scratch = __rseq_abi.cpu_id; "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %k[scratch]\n" // scratch = slabs + scratch - "shlq %b[shift], %[scratch]\n" + "shlq %b[shift], %[scratch]\n" "add %[slabs], %[scratch]\n" // current = scratch->header[cl].current; "movzwq (%[scratch], %[cl], 8), %[current]\n" @@ -623,8 +623,8 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( [scratch] "=&r"(scratch), [current] "=&r"(current) : [rseq_abi] "r"(&__rseq_abi), [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)), - [rseq_cpu_offset] "r"(virtual_cpu_id_offset), - [rseq_sig] "n"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift), + [rseq_cpu_offset] "r"(virtual_cpu_id_offset), + [rseq_sig] "n"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift), [slabs] "r"(slabs), [cl] "r"(cl) : "cc", "memory" #if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT @@ -643,166 +643,166 @@ underflow_path: // As of 3/2020, LLVM's asm goto (even with output constraints) only provides // values for the fallthrough path. The values on the taken branches are // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); + int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); #else // With asm goto--without output constraints--the value of scratch is // well-defined by the compiler and our implementation. As an optimization on // this case, we can avoid looking up cpu_id again, by undoing the // transformation of cpu_id to the value of scratch. - int cpu = - (reinterpret_cast<char*>(scratch) - reinterpret_cast<char*>(slabs)) >> - shift; + int cpu = + (reinterpret_cast<char*>(scratch) - reinterpret_cast<char*>(slabs)) >> + shift; #endif return f(cpu, cl); } #endif // defined(__x86_64__) -#if defined(__aarch64__) -template <size_t NumClasses> -static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( - typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, - UnderflowHandler f, const size_t shift, - const size_t virtual_cpu_id_offset) { - void* result; - void* region_start; - uint64_t cpu_id; - void* begin_ptr; - uintptr_t current; - uintptr_t new_current; - uintptr_t begin; - // Multiply cl by the bytesize of each header - size_t cl_lsl3 = cl * 8; -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - asm goto -#else - bool underflow; - asm -#endif - ( - // TODO(b/141629158): __rseq_cs only needs to be writeable to allow - // for relocations, but could be read-only for non-PIE builds. - ".pushsection __rseq_cs, \"aw?\"\n" - ".balign 32\n" - ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n" - ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n" - "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n" - ".long 0x0\n" - ".long 0x0\n" - ".quad 4f\n" - ".quad 5f - 4f\n" - ".quad 2f\n" - ".popsection\n" -#if !defined(__clang_major__) || __clang_major__ >= 9 - ".reloc 0, R_AARCH64_NONE, 1f\n" -#endif - ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" - "1:\n" - ".balign 8;" - ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - // Force this section to be retained. It is for debugging, but is - // otherwise not referenced. - ".popsection\n" - ".pushsection .text.unlikely, \"ax?\"\n" - ".long %c[rseq_sig]\n" - ".local TcmallocSlab_Internal_Pop_trampoline_%=\n" - ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n" - "TcmallocSlab_Internal_Pop_trampoline_%=:\n" - "2:\n" - "b 3f\n" - ".popsection\n" - // Prepare - "3:\n" - // Use current as scratch here to hold address of this function's - // critical section - "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - "add %[current], %[current], " - ":lo12:__rseq_cs_TcmallocSlab_Internal_Pop_%=\n" - "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n" - // Start - "4:\n" - // cpu_id = __rseq_abi.cpu_id; - "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n" - // region_start = Start of cpu region - "lsl %[region_start], %[cpu_id], %[shift]\n" - "add %[region_start], %[region_start], %[slabs]\n" - // begin_ptr = &(slab_headers[0]->begin) - "add %[begin_ptr], %[region_start], #4\n" - // current = slab_headers[cl]->current (current index) - "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n" - // begin = slab_headers[cl]->begin (begin index) - "ldrh %w[begin], [%[begin_ptr], %[cl_lsl3]]\n" - // if (ABSL_PREDICT_FALSE(begin >= current)) { goto overflow; } - "cmp %w[begin], %w[current]\n" - "sub %w[new_current], %w[current], #1\n" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - "b.ge %l[underflow_path]\n" -#else - "b.ge 5f\n" - // Important! code below this must not affect any flags (i.e.: ccbe) - // If so, the above code needs to explicitly set a ccbe return value. -#endif - // current-- - "ldr %[result], [%[region_start], %[new_current], LSL #3]\n" - "strh %w[new_current], [%[region_start], %[cl_lsl3]]\n" - // Commit - "5:\n" - : -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - [underflow] "=@ccbe"(underflow), -#endif - [result] "=&r"(result), - // Temps - [cpu_id] "=&r"(cpu_id), [region_start] "=&r"(region_start), - [begin] "=&r"(begin), [current] "=&r"(current), - [new_current] "=&r"(new_current), [begin_ptr] "=&r"(begin_ptr) - // Real inputs - : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs), - [cl_lsl3] "r"(cl_lsl3), [rseq_abi] "r"(&__rseq_abi), - [shift] "r"(shift), - // constants - [rseq_cs_offset] "in"(offsetof(kernel_rseq, rseq_cs)), - [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE) - : "cc", "memory" -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - : underflow_path -#endif - ); -#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - if (ABSL_PREDICT_FALSE(underflow)) { - goto underflow_path; - } -#endif - - return result; -underflow_path: -#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT - // As of 3/2020, LLVM's asm goto (even with output constraints) only provides - // values for the fallthrough path. The values on the taken branches are - // undefined. - int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); -#else - // With asm goto--without output constraints--the value of scratch is - // well-defined by the compiler and our implementation. As an optimization on - // this case, we can avoid looking up cpu_id again, by undoing the - // transformation of cpu_id to the value of scratch. - int cpu = cpu_id; -#endif - return f(cpu, cl); -} -#endif // defined(__aarch64__) - -template <size_t NumClasses> -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab<NumClasses>::Pop( +#if defined(__aarch64__) +template <size_t NumClasses> +static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop( + typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, + UnderflowHandler f, const size_t shift, + const size_t virtual_cpu_id_offset) { + void* result; + void* region_start; + uint64_t cpu_id; + void* begin_ptr; + uintptr_t current; + uintptr_t new_current; + uintptr_t begin; + // Multiply cl by the bytesize of each header + size_t cl_lsl3 = cl * 8; +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + asm goto +#else + bool underflow; + asm +#endif + ( + // TODO(b/141629158): __rseq_cs only needs to be writeable to allow + // for relocations, but could be read-only for non-PIE builds. + ".pushsection __rseq_cs, \"aw?\"\n" + ".balign 32\n" + ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" + ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n" + ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n" + "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n" + ".long 0x0\n" + ".long 0x0\n" + ".quad 4f\n" + ".quad 5f - 4f\n" + ".quad 2f\n" + ".popsection\n" +#if !defined(__clang_major__) || __clang_major__ >= 9 + ".reloc 0, R_AARCH64_NONE, 1f\n" +#endif + ".pushsection __rseq_cs_ptr_array, \"aw?\"\n" + "1:\n" + ".balign 8;" + ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" + // Force this section to be retained. It is for debugging, but is + // otherwise not referenced. + ".popsection\n" + ".pushsection .text.unlikely, \"ax?\"\n" + ".long %c[rseq_sig]\n" + ".local TcmallocSlab_Internal_Pop_trampoline_%=\n" + ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n" + "TcmallocSlab_Internal_Pop_trampoline_%=:\n" + "2:\n" + "b 3f\n" + ".popsection\n" + // Prepare + "3:\n" + // Use current as scratch here to hold address of this function's + // critical section + "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Pop_%=\n" + "add %[current], %[current], " + ":lo12:__rseq_cs_TcmallocSlab_Internal_Pop_%=\n" + "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n" + // Start + "4:\n" + // cpu_id = __rseq_abi.cpu_id; + "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n" + // region_start = Start of cpu region + "lsl %[region_start], %[cpu_id], %[shift]\n" + "add %[region_start], %[region_start], %[slabs]\n" + // begin_ptr = &(slab_headers[0]->begin) + "add %[begin_ptr], %[region_start], #4\n" + // current = slab_headers[cl]->current (current index) + "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n" + // begin = slab_headers[cl]->begin (begin index) + "ldrh %w[begin], [%[begin_ptr], %[cl_lsl3]]\n" + // if (ABSL_PREDICT_FALSE(begin >= current)) { goto overflow; } + "cmp %w[begin], %w[current]\n" + "sub %w[new_current], %w[current], #1\n" +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + "b.ge %l[underflow_path]\n" +#else + "b.ge 5f\n" + // Important! code below this must not affect any flags (i.e.: ccbe) + // If so, the above code needs to explicitly set a ccbe return value. +#endif + // current-- + "ldr %[result], [%[region_start], %[new_current], LSL #3]\n" + "strh %w[new_current], [%[region_start], %[cl_lsl3]]\n" + // Commit + "5:\n" + : +#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + [underflow] "=@ccbe"(underflow), +#endif + [result] "=&r"(result), + // Temps + [cpu_id] "=&r"(cpu_id), [region_start] "=&r"(region_start), + [begin] "=&r"(begin), [current] "=&r"(current), + [new_current] "=&r"(new_current), [begin_ptr] "=&r"(begin_ptr) + // Real inputs + : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs), + [cl_lsl3] "r"(cl_lsl3), [rseq_abi] "r"(&__rseq_abi), + [shift] "r"(shift), + // constants + [rseq_cs_offset] "in"(offsetof(kernel_rseq, rseq_cs)), + [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE) + : "cc", "memory" +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + : underflow_path +#endif + ); +#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + if (ABSL_PREDICT_FALSE(underflow)) { + goto underflow_path; + } +#endif + + return result; +underflow_path: +#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT + // As of 3/2020, LLVM's asm goto (even with output constraints) only provides + // values for the fallthrough path. The values on the taken branches are + // undefined. + int cpu = VirtualRseqCpuId(virtual_cpu_id_offset); +#else + // With asm goto--without output constraints--the value of scratch is + // well-defined by the compiler and our implementation. As an optimization on + // this case, we can avoid looking up cpu_id again, by undoing the + // transformation of cpu_id to the value of scratch. + int cpu = cpu_id; +#endif + return f(cpu, cl); +} +#endif // defined(__aarch64__) + +template <size_t NumClasses> +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab<NumClasses>::Pop( size_t cl, UnderflowHandler f) { -#if defined(__x86_64__) || defined(__aarch64__) - return TcmallocSlab_Internal_Pop<NumClasses>(slabs_, cl, f, shift_, - virtual_cpu_id_offset_); +#if defined(__x86_64__) || defined(__aarch64__) + return TcmallocSlab_Internal_Pop<NumClasses>(slabs_, cl, f, shift_, + virtual_cpu_id_offset_); #else - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { - return TcmallocSlab_Internal_Pop_FixedShift(slabs_, cl, f); + if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + return TcmallocSlab_Internal_Pop_FixedShift(slabs_, cl, f); } else { - return TcmallocSlab_Internal_Pop(slabs_, cl, f, shift_); + return TcmallocSlab_Internal_Pop(slabs_, cl, f, shift_); } #endif } @@ -811,24 +811,24 @@ static inline void* NoopUnderflow(int cpu, size_t cl) { return nullptr; } static inline int NoopOverflow(int cpu, size_t cl, void* item) { return -1; } -template <size_t NumClasses> -inline size_t TcmallocSlab<NumClasses>::PushBatch(size_t cl, void** batch, - size_t len) { +template <size_t NumClasses> +inline size_t TcmallocSlab<NumClasses>::PushBatch(size_t cl, void** batch, + size_t len) { ASSERT(len != 0); - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { #if TCMALLOC_PERCPU_USE_RSEQ - // TODO(b/159923407): TcmallocSlab_Internal_PushBatch_FixedShift needs to be - // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid - // needing to dispatch on two separate versions of the same function with - // only minor differences between them. - switch (virtual_cpu_id_offset_) { + // TODO(b/159923407): TcmallocSlab_Internal_PushBatch_FixedShift needs to be + // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid + // needing to dispatch on two separate versions of the same function with + // only minor differences between them. + switch (virtual_cpu_id_offset_) { case offsetof(kernel_rseq, cpu_id): - return TcmallocSlab_Internal_PushBatch_FixedShift(slabs_, cl, batch, - len); + return TcmallocSlab_Internal_PushBatch_FixedShift(slabs_, cl, batch, + len); #ifdef __x86_64__ case offsetof(kernel_rseq, vcpu_id): - return TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(slabs_, cl, - batch, len); + return TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(slabs_, cl, + batch, len); #endif // __x86_64__ default: __builtin_unreachable(); @@ -846,25 +846,25 @@ inline size_t TcmallocSlab<NumClasses>::PushBatch(size_t cl, void** batch, } } -template <size_t NumClasses> -inline size_t TcmallocSlab<NumClasses>::PopBatch(size_t cl, void** batch, - size_t len) { +template <size_t NumClasses> +inline size_t TcmallocSlab<NumClasses>::PopBatch(size_t cl, void** batch, + size_t len) { ASSERT(len != 0); size_t n = 0; - if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { + if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) { #if TCMALLOC_PERCPU_USE_RSEQ - // TODO(b/159923407): TcmallocSlab_Internal_PopBatch_FixedShift needs to be - // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid - // needing to dispatch on two separate versions of the same function with - // only minor differences between them. - switch (virtual_cpu_id_offset_) { + // TODO(b/159923407): TcmallocSlab_Internal_PopBatch_FixedShift needs to be + // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid + // needing to dispatch on two separate versions of the same function with + // only minor differences between them. + switch (virtual_cpu_id_offset_) { case offsetof(kernel_rseq, cpu_id): - n = TcmallocSlab_Internal_PopBatch_FixedShift(slabs_, cl, batch, len); + n = TcmallocSlab_Internal_PopBatch_FixedShift(slabs_, cl, batch, len); break; #ifdef __x86_64__ case offsetof(kernel_rseq, vcpu_id): - n = TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(slabs_, cl, batch, - len); + n = TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(slabs_, cl, batch, + len); break; #endif // __x86_64__ default: @@ -886,77 +886,77 @@ inline size_t TcmallocSlab<NumClasses>::PopBatch(size_t cl, void** batch, return n; } -template <size_t NumClasses> -inline typename TcmallocSlab<NumClasses>::Slabs* -TcmallocSlab<NumClasses>::CpuMemoryStart(int cpu) const { - char* const bytes = reinterpret_cast<char*>(slabs_); - return reinterpret_cast<Slabs*>(&bytes[cpu << shift_]); +template <size_t NumClasses> +inline typename TcmallocSlab<NumClasses>::Slabs* +TcmallocSlab<NumClasses>::CpuMemoryStart(int cpu) const { + char* const bytes = reinterpret_cast<char*>(slabs_); + return reinterpret_cast<Slabs*>(&bytes[cpu << shift_]); } -template <size_t NumClasses> -inline std::atomic<int64_t>* TcmallocSlab<NumClasses>::GetHeader( +template <size_t NumClasses> +inline std::atomic<int64_t>* TcmallocSlab<NumClasses>::GetHeader( int cpu, size_t cl) const { return &CpuMemoryStart(cpu)->header[cl]; } -template <size_t NumClasses> -inline typename TcmallocSlab<NumClasses>::Header -TcmallocSlab<NumClasses>::LoadHeader(std::atomic<int64_t>* hdrp) { - return absl::bit_cast<Header>(hdrp->load(std::memory_order_relaxed)); +template <size_t NumClasses> +inline typename TcmallocSlab<NumClasses>::Header +TcmallocSlab<NumClasses>::LoadHeader(std::atomic<int64_t>* hdrp) { + return absl::bit_cast<Header>(hdrp->load(std::memory_order_relaxed)); } -template <size_t NumClasses> -inline void TcmallocSlab<NumClasses>::StoreHeader(std::atomic<int64_t>* hdrp, - Header hdr) { - hdrp->store(absl::bit_cast<int64_t>(hdr), std::memory_order_relaxed); +template <size_t NumClasses> +inline void TcmallocSlab<NumClasses>::StoreHeader(std::atomic<int64_t>* hdrp, + Header hdr) { + hdrp->store(absl::bit_cast<int64_t>(hdr), std::memory_order_relaxed); } -template <size_t NumClasses> -inline int TcmallocSlab<NumClasses>::CompareAndSwapHeader( - int cpu, std::atomic<int64_t>* hdrp, Header old, Header hdr, - const size_t virtual_cpu_id_offset) { -#if __SIZEOF_POINTER__ == 8 - const int64_t old_raw = absl::bit_cast<int64_t>(old); - const int64_t new_raw = absl::bit_cast<int64_t>(hdr); +template <size_t NumClasses> +inline int TcmallocSlab<NumClasses>::CompareAndSwapHeader( + int cpu, std::atomic<int64_t>* hdrp, Header old, Header hdr, + const size_t virtual_cpu_id_offset) { +#if __SIZEOF_POINTER__ == 8 + const int64_t old_raw = absl::bit_cast<int64_t>(old); + const int64_t new_raw = absl::bit_cast<int64_t>(hdr); return CompareAndSwapUnsafe(cpu, hdrp, static_cast<intptr_t>(old_raw), - static_cast<intptr_t>(new_raw), - virtual_cpu_id_offset); + static_cast<intptr_t>(new_raw), + virtual_cpu_id_offset); #else Crash(kCrash, __FILE__, __LINE__, "This architecture is not supported."); #endif } -template <size_t NumClasses> -inline bool TcmallocSlab<NumClasses>::Header::IsLocked() const { +template <size_t NumClasses> +inline bool TcmallocSlab<NumClasses>::Header::IsLocked() const { return begin == 0xffffu; } -template <size_t NumClasses> -inline void TcmallocSlab<NumClasses>::Header::Lock() { +template <size_t NumClasses> +inline void TcmallocSlab<NumClasses>::Header::Lock() { // Write 0xffff to begin and 0 to end. This blocks new Push'es and Pop's. // Note: we write only 4 bytes. The first 4 bytes are left intact. // See Drain method for details. tl;dr: C++ does not allow us to legally // express this without undefined behavior. - std::atomic<int32_t>* p = - reinterpret_cast<std::atomic<int32_t>*>(&lock_update); + std::atomic<int32_t>* p = + reinterpret_cast<std::atomic<int32_t>*>(&lock_update); Header hdr; hdr.begin = 0xffffu; hdr.end = 0; - p->store(absl::bit_cast<int32_t>(hdr.lock_update), std::memory_order_relaxed); + p->store(absl::bit_cast<int32_t>(hdr.lock_update), std::memory_order_relaxed); } -template <size_t NumClasses> -void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size), - size_t (*capacity)(size_t cl), bool lazy, - size_t shift) { -#ifdef __x86_64__ - if (UsingFlatVirtualCpus()) { - virtual_cpu_id_offset_ = offsetof(kernel_rseq, vcpu_id); - } -#endif // __x86_64__ - - shift_ = shift; - size_t mem_size = absl::base_internal::NumCPUs() * (1ul << shift); +template <size_t NumClasses> +void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size), + size_t (*capacity)(size_t cl), bool lazy, + size_t shift) { +#ifdef __x86_64__ + if (UsingFlatVirtualCpus()) { + virtual_cpu_id_offset_ = offsetof(kernel_rseq, vcpu_id); + } +#endif // __x86_64__ + + shift_ = shift; + size_t mem_size = absl::base_internal::NumCPUs() * (1ul << shift); void* backing = alloc(mem_size); // MSan does not see writes in assembly. ANNOTATE_MEMORY_IS_INITIALIZED(backing, mem_size); @@ -967,7 +967,7 @@ void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size), size_t bytes_used = 0; for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) { bytes_used += sizeof(std::atomic<int64_t>) * NumClasses; - void** elems = CpuMemoryStart(cpu)->mem; + void** elems = CpuMemoryStart(cpu)->mem; for (size_t cl = 0; cl < NumClasses; ++cl) { size_t cap = capacity(cl); @@ -1009,7 +1009,7 @@ void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size), elems += cap; CHECK_CONDITION(reinterpret_cast<char*>(elems) - reinterpret_cast<char*>(CpuMemoryStart(cpu)) <= - (1 << shift_)); + (1 << shift_)); } } // Check for less than 90% usage of the reserved memory @@ -1019,10 +1019,10 @@ void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size), } } -template <size_t NumClasses> -void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - +template <size_t NumClasses> +void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { + const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; + // TODO(ckennelly): Consolidate this logic with Drain. // Phase 1: verify no header is locked for (size_t cl = 0; cl < NumClasses; ++cl) { @@ -1040,7 +1040,7 @@ void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { // of different sizes. reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock(); } - FenceCpu(cpu, virtual_cpu_id_offset); + FenceCpu(cpu, virtual_cpu_id_offset); done = true; for (size_t cl = 0; cl < NumClasses; ++cl) { Header hdr = LoadHeader(GetHeader(cpu, cl)); @@ -1054,7 +1054,7 @@ void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { // Phase 3: Initialize prefetch target and compute the offsets for the // boundaries of each size class' cache. - void** elems = CpuMemoryStart(cpu)->mem; + void** elems = CpuMemoryStart(cpu)->mem; uint16_t begin[NumClasses]; for (size_t cl = 0; cl < NumClasses; ++cl) { size_t cap = capacity(cl); @@ -1076,7 +1076,7 @@ void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { elems += cap; CHECK_CONDITION(reinterpret_cast<char*>(elems) - reinterpret_cast<char*>(CpuMemoryStart(cpu)) <= - (1 << shift_)); + (1 << shift_)); } // Phase 4: Store current. No restartable sequence will proceed @@ -1087,7 +1087,7 @@ void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { hdr.current = begin[cl]; StoreHeader(hdrp, hdr); } - FenceCpu(cpu, virtual_cpu_id_offset); + FenceCpu(cpu, virtual_cpu_id_offset); // Phase 5: Allow access to this cache. for (size_t cl = 0; cl < NumClasses; ++cl) { @@ -1100,84 +1100,84 @@ void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) { } } -template <size_t NumClasses> -void TcmallocSlab<NumClasses>::Destroy(void(free)(void*)) { +template <size_t NumClasses> +void TcmallocSlab<NumClasses>::Destroy(void(free)(void*)) { free(slabs_); slabs_ = nullptr; } -template <size_t NumClasses> -size_t TcmallocSlab<NumClasses>::ShrinkOtherCache(int cpu, size_t cl, - size_t len, void* ctx, - ShrinkHandler f) { - ASSERT(cpu >= 0); - ASSERT(cpu < absl::base_internal::NumCPUs()); - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; - - // Phase 1: Collect begin as it will be overwritten by the lock. - std::atomic<int64_t>* hdrp = GetHeader(cpu, cl); - Header hdr = LoadHeader(hdrp); - CHECK_CONDITION(!hdr.IsLocked()); - const uint16_t begin = hdr.begin; - - // Phase 2: stop concurrent mutations. - for (bool done = false; !done;) { - reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock(); - FenceCpu(cpu, virtual_cpu_id_offset); - done = true; - - hdr = LoadHeader(GetHeader(cpu, cl)); - if (!hdr.IsLocked()) { - // Header was overwritten by Grow/Shrink. Retry. - done = false; - } - } - - // Phase 3: If we do not have len number of items to shrink, we try - // to pop items from the list first to create enough capacity that can be - // shrunk. If we pop items, we also execute callbacks. - // - // We can't write all 4 fields at once with a single write, because Pop does - // several non-atomic loads of the fields. Consider that a concurrent Pop - // loads old current (still pointing somewhere in the middle of the region); - // then we update all fields with a single write; then Pop loads the updated - // begin which allows it to proceed; then it decrements current below begin. - // - // So we instead first just update current--our locked begin/end guarantee - // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop - // is using the old current, and can safely update begin/end to be an empty - // slab. - - const uint16_t unused = hdr.end_copy - hdr.current; - if (unused < len) { - const uint16_t expected_pop = len - unused; - const uint16_t actual_pop = - std::min<uint16_t>(expected_pop, hdr.current - begin); - void** batch = - reinterpret_cast<void**>(GetHeader(cpu, 0) + hdr.current - actual_pop); - f(ctx, cl, batch, actual_pop); - hdr.current -= actual_pop; - StoreHeader(hdrp, hdr); - FenceCpu(cpu, virtual_cpu_id_offset); - } - - // Phase 4: Shrink the capacity. Use a copy of begin and end_copy to - // restore the header, shrink it, and return the length by which the - // region was shrunk. - hdr.begin = begin; - const uint16_t to_shrink = - std::min<uint16_t>(len, hdr.end_copy - hdr.current); - hdr.end_copy -= to_shrink; - hdr.end = hdr.end_copy; - StoreHeader(hdrp, hdr); - return to_shrink; -} - -template <size_t NumClasses> -void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) { +template <size_t NumClasses> +size_t TcmallocSlab<NumClasses>::ShrinkOtherCache(int cpu, size_t cl, + size_t len, void* ctx, + ShrinkHandler f) { + ASSERT(cpu >= 0); + ASSERT(cpu < absl::base_internal::NumCPUs()); + const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; + + // Phase 1: Collect begin as it will be overwritten by the lock. + std::atomic<int64_t>* hdrp = GetHeader(cpu, cl); + Header hdr = LoadHeader(hdrp); + CHECK_CONDITION(!hdr.IsLocked()); + const uint16_t begin = hdr.begin; + + // Phase 2: stop concurrent mutations. + for (bool done = false; !done;) { + reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock(); + FenceCpu(cpu, virtual_cpu_id_offset); + done = true; + + hdr = LoadHeader(GetHeader(cpu, cl)); + if (!hdr.IsLocked()) { + // Header was overwritten by Grow/Shrink. Retry. + done = false; + } + } + + // Phase 3: If we do not have len number of items to shrink, we try + // to pop items from the list first to create enough capacity that can be + // shrunk. If we pop items, we also execute callbacks. + // + // We can't write all 4 fields at once with a single write, because Pop does + // several non-atomic loads of the fields. Consider that a concurrent Pop + // loads old current (still pointing somewhere in the middle of the region); + // then we update all fields with a single write; then Pop loads the updated + // begin which allows it to proceed; then it decrements current below begin. + // + // So we instead first just update current--our locked begin/end guarantee + // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop + // is using the old current, and can safely update begin/end to be an empty + // slab. + + const uint16_t unused = hdr.end_copy - hdr.current; + if (unused < len) { + const uint16_t expected_pop = len - unused; + const uint16_t actual_pop = + std::min<uint16_t>(expected_pop, hdr.current - begin); + void** batch = + reinterpret_cast<void**>(GetHeader(cpu, 0) + hdr.current - actual_pop); + f(ctx, cl, batch, actual_pop); + hdr.current -= actual_pop; + StoreHeader(hdrp, hdr); + FenceCpu(cpu, virtual_cpu_id_offset); + } + + // Phase 4: Shrink the capacity. Use a copy of begin and end_copy to + // restore the header, shrink it, and return the length by which the + // region was shrunk. + hdr.begin = begin; + const uint16_t to_shrink = + std::min<uint16_t>(len, hdr.end_copy - hdr.current); + hdr.end_copy -= to_shrink; + hdr.end = hdr.end_copy; + StoreHeader(hdrp, hdr); + return to_shrink; +} + +template <size_t NumClasses> +void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) { CHECK_CONDITION(cpu >= 0); CHECK_CONDITION(cpu < absl::base_internal::NumCPUs()); - const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; + const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_; // Push/Pop/Grow/Shrink can be executed concurrently with Drain. // That's not an expected case, but it must be handled for correctness. @@ -1207,7 +1207,7 @@ void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) { // of different sizes. reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock(); } - FenceCpu(cpu, virtual_cpu_id_offset); + FenceCpu(cpu, virtual_cpu_id_offset); done = true; for (size_t cl = 0; cl < NumClasses; ++cl) { Header hdr = LoadHeader(GetHeader(cpu, cl)); @@ -1250,7 +1250,7 @@ void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) { // Phase 5: fence and reset the remaining fields to beginning of the region. // This allows concurrent mutations again. - FenceCpu(cpu, virtual_cpu_id_offset); + FenceCpu(cpu, virtual_cpu_id_offset); for (size_t cl = 0; cl < NumClasses; ++cl) { std::atomic<int64_t>* hdrp = GetHeader(cpu, cl); Header hdr; @@ -1262,18 +1262,18 @@ void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) { } } -template <size_t NumClasses> -PerCPUMetadataState TcmallocSlab<NumClasses>::MetadataMemoryUsage() const { +template <size_t NumClasses> +PerCPUMetadataState TcmallocSlab<NumClasses>::MetadataMemoryUsage() const { PerCPUMetadataState result; - result.virtual_size = absl::base_internal::NumCPUs() * (1ul << shift_); + result.virtual_size = absl::base_internal::NumCPUs() * (1ul << shift_); result.resident_size = MInCore::residence(slabs_, result.virtual_size); return result; } } // namespace percpu } // namespace subtle -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc index 39f07fbe67..d10ca98dd0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc @@ -36,15 +36,15 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "absl/types/span.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/internal/config.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/util.h" #include "tcmalloc/malloc_extension.h" -#include "tcmalloc/testing/testutil.h" +#include "tcmalloc/testing/testutil.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace subtle { namespace percpu { namespace { @@ -96,8 +96,8 @@ void RunOnSingleCpu(std::function<bool(int)> test) { constexpr size_t kStressSlabs = 4; constexpr size_t kStressCapacity = 4; -constexpr size_t kShift = 18; -typedef class TcmallocSlab<kStressSlabs> TcmallocSlab; +constexpr size_t kShift = 18; +typedef class TcmallocSlab<kStressSlabs> TcmallocSlab; enum class SlabInit { kEager, @@ -110,12 +110,12 @@ class TcmallocSlabTest : public testing::TestWithParam<SlabInit> { slab_test_ = &slab_; metadata_bytes_ = 0; -// Ignore false-positive warning in GCC. For more information, see: -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96003 -#pragma GCC diagnostic ignored "-Wnonnull" +// Ignore false-positive warning in GCC. For more information, see: +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96003 +#pragma GCC diagnostic ignored "-Wnonnull" slab_.Init( &ByteCountingMalloc, [](size_t cl) { return kCapacity; }, - GetParam() == SlabInit::kLazy, kShift); + GetParam() == SlabInit::kLazy, kShift); for (int i = 0; i < kCapacity; ++i) { object_ptrs_[i] = &objects_[i]; @@ -267,14 +267,14 @@ TEST_P(TcmallocSlabTest, Unit) { for (auto cpu : AllowedCpus()) { SCOPED_TRACE(cpu); - // Temporarily fake being on the given CPU. - ScopedFakeCpuId fake_cpu_id(cpu); - -#if !defined(__ppc__) + // Temporarily fake being on the given CPU. + ScopedFakeCpuId fake_cpu_id(cpu); + +#if !defined(__ppc__) if (UsingFlatVirtualCpus()) { -#if TCMALLOC_PERCPU_USE_RSEQ +#if TCMALLOC_PERCPU_USE_RSEQ __rseq_abi.vcpu_id = cpu ^ 1; -#endif +#endif cpu = cpu ^ 1; } #endif @@ -288,7 +288,7 @@ TEST_P(TcmallocSlabTest, Unit) { // This is imperfect but the window between operations below is small. We // can make this more precise around individual operations if we see // measurable flakiness as a result. - if (fake_cpu_id.Tampered()) break; + if (fake_cpu_id.Tampered()) break; #endif // Check new slab state. @@ -296,7 +296,7 @@ TEST_P(TcmallocSlabTest, Unit) { ASSERT_EQ(slab_.Capacity(cpu, cl), 0); if (!initialized[cpu]) { -#pragma GCC diagnostic ignored "-Wnonnull" +#pragma GCC diagnostic ignored "-Wnonnull" void* ptr = slab_.Pop(cl, [](int cpu, size_t cl) { slab_test_->InitCPU(cpu, [](size_t cl) { return kCapacity; }); @@ -506,7 +506,7 @@ static void StressThread(size_t thread_id, TcmallocSlab* slab, absl::BitGen rnd(absl::SeedSeq({thread_id})); while (!*stop) { size_t cl = absl::Uniform<int32_t>(rnd, 0, kStressSlabs); - const int what = absl::Uniform<int32_t>(rnd, 0, 91); + const int what = absl::Uniform<int32_t>(rnd, 0, 91); if (what < 10) { if (!block->empty()) { if (slab->Push(cl, block->back(), &Handler::Overflow)) { @@ -554,14 +554,14 @@ static void StressThread(size_t thread_id, TcmallocSlab* slab, } } if (n != 0) { - size_t res = slab->Grow(slab->GetCurrentVirtualCpuUnsafe(), cl, n, - kStressCapacity); + size_t res = slab->Grow(slab->GetCurrentVirtualCpuUnsafe(), cl, n, + kStressCapacity); EXPECT_LE(res, n); capacity->fetch_add(n - res); } } else if (what < 60) { size_t n = - slab->Shrink(slab->GetCurrentVirtualCpuUnsafe(), cl, + slab->Shrink(slab->GetCurrentVirtualCpuUnsafe(), cl, absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1); capacity->fetch_add(n); } else if (what < 70) { @@ -572,37 +572,37 @@ static void StressThread(size_t thread_id, TcmallocSlab* slab, size_t cap = slab->Capacity( absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()), cl); EXPECT_LE(cap, kStressCapacity); - } else if (what < 90) { - struct Context { - std::vector<void*>* block; - std::atomic<size_t>* capacity; - }; - Context ctx = {block, capacity}; - int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()); - if (mutexes->at(cpu).TryLock()) { - size_t to_shrink = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1; - size_t total_shrunk = slab->ShrinkOtherCache( - cpu, cl, to_shrink, &ctx, - [](void* arg, size_t cl, void** batch, size_t n) { - Context* ctx = static_cast<Context*>(arg); - EXPECT_LT(cl, kStressSlabs); - EXPECT_LE(n, kStressCapacity); - for (size_t i = 0; i < n; ++i) { - EXPECT_NE(batch[i], nullptr); - ctx->block->push_back(batch[i]); - } - }); - EXPECT_LE(total_shrunk, to_shrink); - EXPECT_LE(0, total_shrunk); - capacity->fetch_add(total_shrunk); - mutexes->at(cpu).Unlock(); - } + } else if (what < 90) { + struct Context { + std::vector<void*>* block; + std::atomic<size_t>* capacity; + }; + Context ctx = {block, capacity}; + int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()); + if (mutexes->at(cpu).TryLock()) { + size_t to_shrink = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1; + size_t total_shrunk = slab->ShrinkOtherCache( + cpu, cl, to_shrink, &ctx, + [](void* arg, size_t cl, void** batch, size_t n) { + Context* ctx = static_cast<Context*>(arg); + EXPECT_LT(cl, kStressSlabs); + EXPECT_LE(n, kStressCapacity); + for (size_t i = 0; i < n; ++i) { + EXPECT_NE(batch[i], nullptr); + ctx->block->push_back(batch[i]); + } + }); + EXPECT_LE(total_shrunk, to_shrink); + EXPECT_LE(0, total_shrunk); + capacity->fetch_add(total_shrunk); + mutexes->at(cpu).Unlock(); + } } else { struct Context { std::vector<void*>* block; std::atomic<size_t>* capacity; }; - Context ctx = {block, capacity}; + Context ctx = {block, capacity}; int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()); if (mutexes->at(cpu).TryLock()) { slab->Drain( @@ -646,8 +646,8 @@ TEST(TcmallocSlab, Stress) { TcmallocSlab slab; slab.Init( allocator, - [](size_t cl) { return cl < kStressSlabs ? kStressCapacity : 0; }, false, - kShift); + [](size_t cl) { return cl < kStressSlabs ? kStressCapacity : 0; }, false, + kShift); std::vector<std::thread> threads; const int n_threads = 2 * absl::base_internal::NumCPUs(); @@ -799,12 +799,12 @@ static void BM_PushPop(benchmark::State& state) { RunOnSingleCpu([&](int this_cpu) { const int kBatchSize = 32; TcmallocSlab slab; - -#pragma GCC diagnostic ignored "-Wnonnull" + +#pragma GCC diagnostic ignored "-Wnonnull" slab.Init( - allocator, [](size_t cl) -> size_t { return kBatchSize; }, false, - kShift); - + allocator, [](size_t cl) -> size_t { return kBatchSize; }, false, + kShift); + CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) == kBatchSize); void* batch[kBatchSize]; @@ -831,8 +831,8 @@ static void BM_PushPopBatch(benchmark::State& state) { const int kBatchSize = 32; TcmallocSlab slab; slab.Init( - allocator, [](size_t cl) -> size_t { return kBatchSize; }, false, - kShift); + allocator, [](size_t cl) -> size_t { return kBatchSize; }, false, + kShift); CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) == kBatchSize); void* batch[kBatchSize]; @@ -851,5 +851,5 @@ BENCHMARK(BM_PushPopBatch); } // namespace } // namespace percpu } // namespace subtle -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc index 5a5586cfff..415b8302ab 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc @@ -23,9 +23,9 @@ #include "absl/strings/str_format.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/util.h" +#include "tcmalloc/internal/util.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -69,7 +69,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer* buffer) { // No error logging since this can be called from the crash dump // handler at awkward moments. Users should call Valid() before // using. - TCMALLOC_RETRY_ON_TEMP_FAILURE(fd_ = open(ibuf_, O_RDONLY)); + TCMALLOC_RETRY_ON_TEMP_FAILURE(fd_ = open(ibuf_, O_RDONLY)); #else fd_ = -1; // so Valid() is always false #endif @@ -107,8 +107,8 @@ bool ProcMapsIterator::NextExt(uint64_t* start, uint64_t* end, char** flags, int nread = 0; // fill up buffer with text while (etext_ < ebuf_) { - TCMALLOC_RETRY_ON_TEMP_FAILURE(nread = - read(fd_, etext_, ebuf_ - etext_)); + TCMALLOC_RETRY_ON_TEMP_FAILURE(nread = + read(fd_, etext_, ebuf_ - etext_)); if (nread > 0) etext_ += nread; else @@ -168,4 +168,4 @@ bool ProcMapsIterator::NextExt(uint64_t* start, uint64_t* end, char** flags, } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h index c5c763a1e8..81e54bae3b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h @@ -19,9 +19,9 @@ #include <stdint.h> #include <sys/types.h> -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -65,6 +65,6 @@ class ProcMapsIterator { } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_PROC_MAPS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h index 25b863934f..86ba100d2b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h @@ -23,13 +23,13 @@ #include <limits> #include <type_traits> -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Keeps a bitmap of some fixed size (N bits). template <size_t N> @@ -46,16 +46,16 @@ class Bitmap { // Returns the number of set bits [index, ..., index + n - 1]. size_t CountBits(size_t index, size_t n) const; - // Returns whether the bitmap is entirely zero or not. - bool IsZero() const; - + // Returns whether the bitmap is entirely zero or not. + bool IsZero() const; + // Equivalent to SetBit on bits [index, index + 1, ... index + n - 1]. void SetRange(size_t index, size_t n); void ClearRange(size_t index, size_t n); - // Clears the lowest set bit. Special case is faster than more flexible code. - void ClearLowestBit(); - + // Clears the lowest set bit. Special case is faster than more flexible code. + void ClearLowestBit(); + // If there is at least one free range at or after <start>, // put it in *index, *length and return true; else return false. bool NextFreeRange(size_t start, size_t *index, size_t *length) const; @@ -280,8 +280,8 @@ inline size_t Bitmap<N>::CountWordBits(size_t i, size_t from, size_t to) const { ASSERT(0 < n && n <= kWordSize); const size_t mask = (all_ones >> (kWordSize - n)) << from; - ASSUME(i < kWords); - return absl::popcount(bits_[i] & mask); + ASSUME(i < kWords); + return absl::popcount(bits_[i] & mask); } // Set the bits [from, to) in the i-th word to Value. @@ -305,34 +305,34 @@ inline void Bitmap<N>::SetWordBits(size_t i, size_t from, size_t to) { template <size_t N> inline bool Bitmap<N>::GetBit(size_t i) const { - ASSERT(i < N); + ASSERT(i < N); size_t word = i / kWordSize; size_t offset = i % kWordSize; - ASSUME(word < kWords); + ASSUME(word < kWords); return bits_[word] & (size_t{1} << offset); } template <size_t N> inline void Bitmap<N>::SetBit(size_t i) { - ASSERT(i < N); + ASSERT(i < N); size_t word = i / kWordSize; size_t offset = i % kWordSize; - ASSUME(word < kWords); + ASSUME(word < kWords); bits_[word] |= (size_t{1} << offset); } template <size_t N> inline void Bitmap<N>::ClearBit(size_t i) { - ASSERT(i < N); + ASSERT(i < N); size_t word = i / kWordSize; size_t offset = i % kWordSize; - ASSUME(word < kWords); + ASSUME(word < kWords); bits_[word] &= ~(size_t{1} << offset); } template <size_t N> inline size_t Bitmap<N>::CountBits(size_t index, size_t n) const { - ASSUME(index + n <= N); + ASSUME(index + n <= N); size_t count = 0; if (n == 0) { return count; @@ -354,16 +354,16 @@ inline size_t Bitmap<N>::CountBits(size_t index, size_t n) const { } template <size_t N> -inline bool Bitmap<N>::IsZero() const { - for (int i = 0; i < kWords; ++i) { - if (bits_[i] != 0) { - return false; - } - } - return true; -} - -template <size_t N> +inline bool Bitmap<N>::IsZero() const { + for (int i = 0; i < kWords; ++i) { + if (bits_[i] != 0) { + return false; + } + } + return true; +} + +template <size_t N> inline void Bitmap<N>::SetRange(size_t index, size_t n) { SetRangeValue<true>(index, n); } @@ -374,16 +374,16 @@ inline void Bitmap<N>::ClearRange(size_t index, size_t n) { } template <size_t N> -inline void Bitmap<N>::ClearLowestBit() { - for (int i = 0; i < kWords; ++i) { - if (bits_[i] != 0) { - bits_[i] &= bits_[i] - 1; - break; - } - } -} - -template <size_t N> +inline void Bitmap<N>::ClearLowestBit() { + for (int i = 0; i < kWords; ++i) { + if (bits_[i] != 0) { + bits_[i] &= bits_[i] - 1; + break; + } + } +} + +template <size_t N> template <bool Value> inline void Bitmap<N>::SetRangeValue(size_t index, size_t n) { ASSERT(index + n <= N); @@ -444,10 +444,10 @@ inline void Bitmap<N>::Clear() { template <size_t N> template <bool Goal> inline size_t Bitmap<N>::FindValue(size_t index) const { - ASSERT(index < N); + ASSERT(index < N); size_t offset = index % kWordSize; size_t word = index / kWordSize; - ASSUME(word < kWords); + ASSUME(word < kWords); size_t here = bits_[word]; if (!Goal) here = ~here; size_t mask = ~static_cast<size_t>(0) << offset; @@ -462,8 +462,8 @@ inline size_t Bitmap<N>::FindValue(size_t index) const { } word *= kWordSize; - ASSUME(here != 0); - size_t ret = absl::countr_zero(here) + word; + ASSUME(here != 0); + size_t ret = absl::countr_zero(here) + word; if (kDeadBits > 0) { if (ret > N) ret = N; } @@ -473,10 +473,10 @@ inline size_t Bitmap<N>::FindValue(size_t index) const { template <size_t N> template <bool Goal> inline ssize_t Bitmap<N>::FindValueBackwards(size_t index) const { - ASSERT(index < N); + ASSERT(index < N); size_t offset = index % kWordSize; ssize_t word = index / kWordSize; - ASSUME(word < kWords); + ASSUME(word < kWords); size_t here = bits_[word]; if (!Goal) here = ~here; size_t mask = (static_cast<size_t>(2) << offset) - 1; @@ -491,13 +491,13 @@ inline ssize_t Bitmap<N>::FindValueBackwards(size_t index) const { } word *= kWordSize; - ASSUME(here != 0); - size_t ret = absl::bit_width(here) - 1 + word; + ASSUME(here != 0); + size_t ret = absl::bit_width(here) - 1 + word; return ret; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_RANGE_TRACKER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc index 278fc9ef1e..04dfc46c55 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc @@ -22,9 +22,9 @@ #include "benchmark/benchmark.h" #include "tcmalloc/internal/range_tracker.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { template <size_t N> @@ -382,6 +382,6 @@ BENCHMARK_TEMPLATE(BM_ScanChunks, 256); BENCHMARK_TEMPLATE(BM_ScanChunks, 256 * 32); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc index 4f9202e221..8557c8e3b2 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc @@ -26,7 +26,7 @@ #include "absl/random/random.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { using testing::ElementsAre; @@ -98,29 +98,29 @@ TEST_F(BitmapTest, GetBitEmpty) { } } -TEST_F(BitmapTest, CheckIsZero) { - Bitmap<253> map; - EXPECT_EQ(map.IsZero(), true); - for (size_t i = 0; i < map.size(); ++i) { - map.Clear(); - EXPECT_EQ(map.IsZero(), true); - map.SetBit(i); - EXPECT_EQ(map.IsZero(), false); - } -} - -TEST_F(BitmapTest, CheckClearLowestBit) { - Bitmap<253> map; - for (size_t i = 0; i < map.size(); ++i) { - map.SetBit(i); - } - for (size_t i = 0; i < map.size(); ++i) { - size_t index = map.FindSet(0); - EXPECT_EQ(index, i); - map.ClearLowestBit(); - } -} - +TEST_F(BitmapTest, CheckIsZero) { + Bitmap<253> map; + EXPECT_EQ(map.IsZero(), true); + for (size_t i = 0; i < map.size(); ++i) { + map.Clear(); + EXPECT_EQ(map.IsZero(), true); + map.SetBit(i); + EXPECT_EQ(map.IsZero(), false); + } +} + +TEST_F(BitmapTest, CheckClearLowestBit) { + Bitmap<253> map; + for (size_t i = 0; i < map.size(); ++i) { + map.SetBit(i); + } + for (size_t i = 0; i < map.size(); ++i) { + size_t index = map.FindSet(0); + EXPECT_EQ(index, i); + map.ClearLowestBit(); + } +} + TEST_F(BitmapTest, GetBitOneSet) { const size_t N = 251; for (size_t s = 0; s < N; s++) { @@ -290,5 +290,5 @@ TEST_F(RangeTrackerTest, Trivial) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h index f1b6d3375f..053f9863e9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h @@ -23,16 +23,16 @@ #include "absl/base/internal/cycleclock.h" #include "absl/functional/function_ref.h" -#include "absl/numeric/bits.h" -#include "absl/numeric/int128.h" +#include "absl/numeric/bits.h" +#include "absl/numeric/int128.h" #include "absl/time/clock.h" #include "absl/time/time.h" -#include "tcmalloc/internal/clock.h" +#include "tcmalloc/internal/clock.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Aggregates a series of reported values of type S in a set of entries of type // T, one entry per epoch. This class factors out common functionality of @@ -44,16 +44,16 @@ class TimeSeriesTracker { enum SkipEntriesSetting { kSkipEmptyEntries, kDoNotSkipEmptyEntries }; explicit constexpr TimeSeriesTracker(Clock clock, absl::Duration w) - : window_(w), epoch_length_(window_ / kEpochs), clock_(clock) { - // See comment in GetCurrentEpoch(). - auto d = static_cast<uint64_t>(absl::ToDoubleSeconds(epoch_length_) * - clock.freq()); - div_precision_ = 63 + absl::bit_width(d); - epoch_ticks_m_ = - static_cast<uint64_t>( - (static_cast<absl::uint128>(1) << div_precision_) / d) + - 1; - } + : window_(w), epoch_length_(window_ / kEpochs), clock_(clock) { + // See comment in GetCurrentEpoch(). + auto d = static_cast<uint64_t>(absl::ToDoubleSeconds(epoch_length_) * + clock.freq()); + div_precision_ = 63 + absl::bit_width(d); + epoch_ticks_m_ = + static_cast<uint64_t>( + (static_cast<absl::uint128>(1) << div_precision_) / d) + + 1; + } bool Report(S val); @@ -66,7 +66,7 @@ class TimeSeriesTracker { // Iterates over the last num_epochs data points (if -1, iterate to the // oldest entry). Offsets are relative to the end of the buffer. void IterBackwards(absl::FunctionRef<void(size_t, int64_t, const T&)> f, - int64_t num_epochs = -1) const; + int64_t num_epochs = -1) const; // This retrieves a particular data point (if offset is outside the valid // range, the default data point will be returned). @@ -82,21 +82,21 @@ class TimeSeriesTracker { bool UpdateClock(); // Returns the current epoch based on the clock. - int64_t GetCurrentEpoch() { - // This is equivalent to - // `clock_.now() / (absl::ToDoubleSeconds(epoch_length_) * clock_.freq())`. - // We basically follow the technique from - // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html, - // except that we use one fewer bit of precision than necessary to always - // get the correct answer if the numerator were a 64-bit unsigned number. In - // this case, because clock_.now() returns a signed 64-bit number (i.e. max - // is <2^63), it shouldn't cause a problem. This way, we don't need to - // handle overflow so it's simpler. See also: - // https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/. - return static_cast<int64_t>(static_cast<absl::uint128>(epoch_ticks_m_) * - clock_.now() >> - div_precision_); - } + int64_t GetCurrentEpoch() { + // This is equivalent to + // `clock_.now() / (absl::ToDoubleSeconds(epoch_length_) * clock_.freq())`. + // We basically follow the technique from + // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html, + // except that we use one fewer bit of precision than necessary to always + // get the correct answer if the numerator were a 64-bit unsigned number. In + // this case, because clock_.now() returns a signed 64-bit number (i.e. max + // is <2^63), it shouldn't cause a problem. This way, we don't need to + // handle overflow so it's simpler. See also: + // https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/. + return static_cast<int64_t>(static_cast<absl::uint128>(epoch_ticks_m_) * + clock_.now() >> + div_precision_); + } const absl::Duration window_; const absl::Duration epoch_length_; @@ -104,10 +104,10 @@ class TimeSeriesTracker { T entries_[kEpochs]{}; size_t last_epoch_{0}; size_t current_epoch_{0}; - // This is the magic constant from - // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html. - uint64_t epoch_ticks_m_; - uint8_t div_precision_; + // This is the magic constant from + // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html. + uint64_t epoch_ticks_m_; + uint8_t div_precision_; Clock clock_; }; @@ -158,7 +158,7 @@ void TimeSeriesTracker<T, S, kEpochs>::Iter( template <class T, class S, size_t kEpochs> void TimeSeriesTracker<T, S, kEpochs>::IterBackwards( absl::FunctionRef<void(size_t, int64_t, const T&)> f, - int64_t num_epochs) const { + int64_t num_epochs) const { // -1 means that we are outputting all epochs. num_epochs = (num_epochs == -1) ? kEpochs : num_epochs; size_t j = current_epoch_; @@ -188,8 +188,8 @@ bool TimeSeriesTracker<T, S, kEpochs>::Report(S val) { return updated_clock; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc index 1f75306161..c5f647cf1e 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc @@ -20,7 +20,7 @@ using ::testing::ElementsAre; namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class TimeSeriesTrackerTest : public testing::Test { @@ -42,7 +42,7 @@ class TimeSeriesTrackerTest : public testing::Test { static constexpr absl::Duration kDuration = absl::Seconds(2); - TimeSeriesTracker<TestEntry, int, 8> tracker_{ + TimeSeriesTracker<TestEntry, int, 8> tracker_{ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kDuration}; private: @@ -59,10 +59,10 @@ int64_t TimeSeriesTrackerTest::clock_{0}; // Test that frequency conversion in the cycle clock works correctly TEST(TimeSeriesTest, CycleClock) { - TimeSeriesTracker<TimeSeriesTrackerTest::TestEntry, int, 100> tracker{ - Clock{absl::base_internal::CycleClock::Now, - absl::base_internal::CycleClock::Frequency}, - absl::Seconds(10)}; // 100ms epochs + TimeSeriesTracker<TimeSeriesTrackerTest::TestEntry, int, 100> tracker{ + Clock{absl::base_internal::CycleClock::Now, + absl::base_internal::CycleClock::Frequency}, + absl::Seconds(10)}; // 100ms epochs tracker.Report(1); absl::SleepFor(absl::Milliseconds(100)); @@ -187,5 +187,5 @@ TEST_F(TimeSeriesTrackerTest, Works) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.cc b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc index ef705b02e3..ff36e00985 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/util.cc +++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc @@ -27,7 +27,7 @@ #include "absl/time/time.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -192,4 +192,4 @@ bool ScopedAffinityMask::Tampered() { } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.h b/contrib/libs/tcmalloc/tcmalloc/internal/util.h index b43e322257..23139b09b9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal/util.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.h @@ -29,20 +29,20 @@ #include "absl/base/internal/sysinfo.h" #include "absl/time/time.h" #include "absl/types/span.h" -#include "tcmalloc/internal/config.h" - -#define TCMALLOC_RETRY_ON_TEMP_FAILURE(expression) \ - (__extension__({ \ - long int _temp_failure_retry_result; \ - do _temp_failure_retry_result = (long int)(expression); \ - while (_temp_failure_retry_result == -1L && errno == EINTR); \ - _temp_failure_retry_result; \ - })) - +#include "tcmalloc/internal/config.h" + +#define TCMALLOC_RETRY_ON_TEMP_FAILURE(expression) \ + (__extension__({ \ + long int _temp_failure_retry_result; \ + do _temp_failure_retry_result = (long int)(expression); \ + while (_temp_failure_retry_result == -1L && errno == EINTR); \ + _temp_failure_retry_result; \ + })) + // Useful internal utility functions. These calls are async-signal safe // provided the signal handler saves errno at entry and restores it before // return. -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace tcmalloc_internal { @@ -133,6 +133,6 @@ class ScopedAffinityMask { } // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_INTERNAL_UTIL_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h index 66027418ed..83f9b91242 100644 --- a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h +++ b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h @@ -54,8 +54,8 @@ class ProfileAccessor { extern "C" { -ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_ForceCpuCacheActivation(); - +ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_ForceCpuCacheActivation(); + ABSL_ATTRIBUTE_WEAK tcmalloc::AddressRegionFactory* MallocExtension_Internal_GetRegionFactory(); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetRegionFactory( @@ -75,17 +75,17 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit( ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty( const char* name_data, size_t name_size, size_t* value); ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive(); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches(); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches(); ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize(); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval( - absl::Duration* ret); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval( + absl::Duration* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetProperties( std::map<std::string, tcmalloc::MallocExtension::Property>* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetStats(std::string* ret); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxPerCpuCacheSize( int32_t value); -ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetSkipSubreleaseInterval( - absl::Duration value); +ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetSkipSubreleaseInterval( + absl::Duration value); ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ReleaseMemoryToSystem( size_t bytes); @@ -116,10 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetMaxTotalThreadCacheBytes(); ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes( int64_t value); - -ABSL_ATTRIBUTE_WEAK void -MallocExtension_EnableForkSupport(); - + +ABSL_ATTRIBUTE_WEAK void +MallocExtension_EnableForkSupport(); + ABSL_ATTRIBUTE_WEAK void MallocExtension_SetSampleUserDataCallbacks( tcmalloc::MallocExtension::CreateSampleUserDataCallback create, diff --git a/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc index 5395252719..11c05c08da 100644 --- a/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc +++ b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc @@ -14,11 +14,11 @@ #include "tcmalloc/common.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - +namespace tcmalloc_internal { + // <fixed> is fixed per-size-class overhead due to end-of-span fragmentation // and other factors. For instance, if we have a 96 byte size class, and use a // single 8KiB page, then we will hold 85 objects per span, and have 32 bytes @@ -68,10 +68,10 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 240, 1, 32}, // 0.98% { 256, 1, 32}, // 0.59% { 272, 1, 32}, // 0.98% - { 296, 1, 32}, // 3.10% + { 296, 1, 32}, // 3.10% { 312, 1, 32}, // 1.58% { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% + { 352, 1, 32}, // 1.78% { 368, 1, 32}, // 1.78% { 408, 1, 32}, // 0.98% { 448, 1, 32}, // 2.18% @@ -105,7 +105,7 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 9472, 5, 6}, // 8.23% { 10240, 4, 6}, // 6.82% { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% + { 13568, 5, 4}, // 0.75% { 14336, 7, 4}, // 0.08% { 16384, 2, 4}, // 0.29% { 20480, 5, 3}, // 0.12% @@ -119,13 +119,13 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 73728, 9, 2}, // 0.07% { 81920, 10, 2}, // 0.06% { 98304, 12, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% + { 114688, 14, 2}, // 0.04% { 131072, 16, 2}, // 0.04% { 147456, 18, 2}, // 0.03% { 163840, 20, 2}, // 0.03% { 180224, 22, 2}, // 0.03% { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% + { 237568, 29, 2}, // 0.02% { 262144, 32, 2}, // 0.02% }; #elif TCMALLOC_PAGE_SHIFT == 15 @@ -156,16 +156,16 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 176, 1, 32}, // 0.24% { 192, 1, 32}, // 0.54% { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% + { 224, 1, 32}, // 0.34% + { 240, 1, 32}, // 0.54% { 256, 1, 32}, // 0.15% { 280, 1, 32}, // 0.17% { 304, 1, 32}, // 0.89% - { 328, 1, 32}, // 1.06% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% + { 328, 1, 32}, // 1.06% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% + { 448, 1, 32}, // 0.34% { 488, 1, 32}, // 0.37% { 512, 1, 32}, // 0.15% { 576, 1, 32}, // 1.74% @@ -176,8 +176,8 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 1024, 1, 32}, // 0.15% { 1152, 1, 32}, // 1.74% { 1280, 1, 32}, // 2.55% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% + { 1536, 1, 32}, // 1.74% + { 1792, 1, 32}, // 1.74% { 2048, 1, 32}, // 0.15% { 2176, 1, 30}, // 0.54% { 2304, 1, 28}, // 1.74% @@ -189,7 +189,7 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 4608, 1, 14}, // 1.74% { 5376, 1, 12}, // 1.74% { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% + { 7168, 2, 9}, // 1.66% { 8192, 1, 8}, // 0.15% { 9344, 2, 7}, // 0.27% { 10880, 1, 6}, // 0.54% @@ -200,7 +200,7 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 24576, 3, 2}, // 0.05% { 28032, 6, 2}, // 0.22% { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% + { 38144, 5, 2}, // 7.41% { 40960, 4, 2}, // 6.71% { 49152, 3, 2}, // 0.05% { 57344, 7, 2}, // 0.02% @@ -234,32 +234,32 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 80, 1, 32}, // 0.04% { 88, 1, 32}, // 0.05% { 96, 1, 32}, // 0.04% - { 104, 1, 32}, // 0.04% + { 104, 1, 32}, // 0.04% { 112, 1, 32}, // 0.04% { 128, 1, 32}, // 0.02% { 144, 1, 32}, // 0.04% { 160, 1, 32}, // 0.04% { 176, 1, 32}, // 0.05% { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% + { 208, 1, 32}, // 0.04% { 240, 1, 32}, // 0.04% { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% { 360, 1, 32}, // 0.04% - { 408, 1, 32}, // 0.10% - { 456, 1, 32}, // 0.17% + { 408, 1, 32}, // 0.10% + { 456, 1, 32}, // 0.17% { 512, 1, 32}, // 0.02% { 576, 1, 32}, // 0.04% { 640, 1, 32}, // 0.17% { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% + { 896, 1, 32}, // 0.21% { 1024, 1, 32}, // 0.02% { 1152, 1, 32}, // 0.26% { 1280, 1, 32}, // 0.41% - { 1536, 1, 32}, // 0.41% + { 1536, 1, 32}, // 0.41% { 1664, 1, 32}, // 0.36% { 1792, 1, 32}, // 0.21% { 1920, 1, 32}, // 0.41% @@ -267,24 +267,24 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 2176, 1, 30}, // 0.41% { 2304, 1, 28}, // 0.71% { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% + { 2560, 1, 25}, // 0.41% { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% + { 2816, 1, 23}, // 0.12% { 2944, 1, 22}, // 0.07% { 3072, 1, 21}, // 0.41% { 3328, 1, 19}, // 1.00% { 3584, 1, 18}, // 0.21% { 3840, 1, 17}, // 0.41% { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% + { 4736, 1, 13}, // 0.66% { 5504, 1, 11}, // 1.35% { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% + { 6528, 1, 10}, // 0.41% + { 6784, 1, 9}, // 1.71% { 7168, 1, 9}, // 1.61% { 7680, 1, 8}, // 0.41% { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% + { 8704, 1, 7}, // 0.41% { 9344, 1, 7}, // 0.21% { 10880, 1, 6}, // 0.41% { 11904, 1, 5}, // 0.12% @@ -332,11 +332,11 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 88, 1, 32}, // 2.37% { 96, 1, 32}, // 2.78% { 104, 1, 32}, // 2.17% - { 120, 1, 32}, // 1.57% + { 120, 1, 32}, // 1.57% { 128, 1, 32}, // 1.17% { 144, 1, 32}, // 2.78% { 160, 1, 32}, // 3.60% - { 184, 1, 32}, // 2.37% + { 184, 1, 32}, // 2.37% { 208, 1, 32}, // 4.86% { 240, 1, 32}, // 1.57% { 256, 1, 32}, // 1.17% @@ -347,22 +347,22 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 408, 1, 32}, // 1.57% { 512, 1, 32}, // 1.17% { 576, 2, 32}, // 2.18% - { 704, 2, 32}, // 6.40% + { 704, 2, 32}, // 6.40% { 768, 2, 32}, // 7.29% { 896, 2, 32}, // 2.18% { 1024, 2, 32}, // 0.59% { 1152, 3, 32}, // 7.08% { 1280, 3, 32}, // 7.08% { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% + { 1792, 4, 32}, // 1.88% { 2048, 4, 32}, // 0.29% { 2304, 4, 28}, // 1.88% { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% + { 3456, 6, 18}, // 1.79% { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% + { 5376, 4, 12}, // 1.88% { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% + { 7168, 7, 9}, // 0.17% { 8192, 4, 8}, // 0.29% }; #else @@ -452,12 +452,12 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 90112, 11, 2}, // 0.05% { 98304, 12, 2}, // 0.05% { 106496, 13, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% + { 114688, 14, 2}, // 0.04% { 131072, 16, 2}, // 0.04% { 139264, 17, 2}, // 0.03% { 155648, 19, 2}, // 0.03% - { 172032, 21, 2}, // 0.03% - { 188416, 23, 2}, // 0.03% + { 172032, 21, 2}, // 0.03% + { 188416, 23, 2}, // 0.03% { 204800, 25, 2}, // 0.02% { 221184, 27, 2}, // 0.02% { 237568, 29, 2}, // 0.02% @@ -491,10 +491,10 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 272, 1, 32}, // 0.54% { 288, 1, 32}, // 0.84% { 304, 1, 32}, // 0.89% - { 320, 1, 32}, // 0.54% + { 320, 1, 32}, // 0.54% { 336, 1, 32}, // 0.69% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% { 416, 1, 32}, // 1.13% { 448, 1, 32}, // 0.34% { 480, 1, 32}, // 0.54% @@ -510,7 +510,7 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 1280, 1, 32}, // 2.55% { 1408, 1, 32}, // 1.33% { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% + { 1792, 1, 32}, // 1.74% { 2048, 1, 32}, // 0.15% { 2176, 1, 30}, // 0.54% { 2304, 1, 28}, // 1.74% @@ -570,11 +570,11 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 160, 1, 32}, // 0.04% { 176, 1, 32}, // 0.05% { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% + { 208, 1, 32}, // 0.04% { 240, 1, 32}, // 0.04% { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% { 368, 1, 32}, // 0.07% { 416, 1, 32}, // 0.04% { 464, 1, 32}, // 0.19% @@ -582,7 +582,7 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 576, 1, 32}, // 0.04% { 640, 1, 32}, // 0.17% { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% { 832, 1, 32}, // 0.04% { 896, 1, 32}, // 0.21% { 1024, 1, 32}, // 0.02% @@ -597,30 +597,30 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 2176, 1, 30}, // 0.41% { 2304, 1, 28}, // 0.71% { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% + { 2560, 1, 25}, // 0.41% { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% + { 2816, 1, 23}, // 0.12% { 2944, 1, 22}, // 0.07% { 3072, 1, 21}, // 0.41% - { 3200, 1, 20}, // 1.15% + { 3200, 1, 20}, // 1.15% { 3328, 1, 19}, // 1.00% { 3584, 1, 18}, // 0.21% { 3840, 1, 17}, // 0.41% { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% + { 4736, 1, 13}, // 0.66% { 5504, 1, 11}, // 1.35% { 6144, 1, 10}, // 1.61% { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% + { 6784, 1, 9}, // 1.71% { 7168, 1, 9}, // 1.61% { 7680, 1, 8}, // 0.41% { 8192, 1, 8}, // 0.02% { 8704, 1, 7}, // 0.41% { 9344, 1, 7}, // 0.21% - { 10368, 1, 6}, // 1.15% - { 11392, 1, 5}, // 0.07% - { 12416, 1, 5}, // 0.56% - { 13696, 1, 4}, // 0.76% + { 10368, 1, 6}, // 1.15% + { 11392, 1, 5}, // 0.07% + { 12416, 1, 5}, // 0.56% + { 13696, 1, 4}, // 0.76% { 14464, 1, 4}, // 0.71% { 16384, 1, 4}, // 0.02% { 17408, 1, 3}, // 0.41% @@ -695,7 +695,7 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount { 3200, 4, 20}, // 2.70% { 3584, 7, 18}, // 0.17% { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% + { 5376, 4, 12}, // 1.88% { 6144, 3, 10}, // 0.39% { 7168, 7, 9}, // 0.17% { 8192, 4, 8}, // 0.29% @@ -706,6 +706,6 @@ const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount #endif // clang-format on -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override.h b/contrib/libs/tcmalloc/tcmalloc/libc_override.h index 89f8e4e5c8..97c282895d 100644 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override.h +++ b/contrib/libs/tcmalloc/tcmalloc/libc_override.h @@ -30,8 +30,8 @@ #if defined(__GLIBC__) #include "tcmalloc/libc_override_glibc.h" - -#else + +#else #include "tcmalloc/libc_override_redefine.h" #endif diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h index 709bcb727f..f66dd7f05b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h +++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h @@ -102,10 +102,10 @@ int posix_memalign(void** r, size_t a, size_t s) noexcept void malloc_stats(void) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocStats); int mallopt(int cmd, int value) noexcept TCMALLOC_ALIAS(TCMallocInternalMallOpt); -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO struct mallinfo mallinfo(void) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocInfo); -#endif +#endif size_t malloc_size(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocSize); size_t malloc_usable_size(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocSize); diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h index b1655461c3..19be8bc470 100644 --- a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h +++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h @@ -44,57 +44,57 @@ void operator delete(void* ptr, const std::nothrow_t& nt) noexcept { void operator delete[](void* ptr, const std::nothrow_t& nt) noexcept { return TCMallocInternalDeleteArrayNothrow(ptr, nt); } - + extern "C" { -void* malloc(size_t s) { return TCMallocInternalMalloc(s); } -void* calloc(size_t n, size_t s) { return TCMallocInternalCalloc(n, s); } -void* realloc(void* p, size_t s) { return TCMallocInternalRealloc(p, s); } -void free(void* p) { TCMallocInternalFree(p); } -void* memalign(size_t a, size_t s) { return TCMallocInternalMemalign(a, s); } -int posix_memalign(void** r, size_t a, size_t s) { - return TCMallocInternalPosixMemalign(r, a, s); -} -size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); } - -// tcmalloc extension +void* malloc(size_t s) { return TCMallocInternalMalloc(s); } +void* calloc(size_t n, size_t s) { return TCMallocInternalCalloc(n, s); } +void* realloc(void* p, size_t s) { return TCMallocInternalRealloc(p, s); } +void free(void* p) { TCMallocInternalFree(p); } +void* memalign(size_t a, size_t s) { return TCMallocInternalMemalign(a, s); } +int posix_memalign(void** r, size_t a, size_t s) { + return TCMallocInternalPosixMemalign(r, a, s); +} +size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); } + +// tcmalloc extension void sdallocx(void* p, size_t s, int flags) noexcept { TCMallocInternalSdallocx(p, s, flags); } - -#if defined(__GLIBC__) || defined(__NEWLIB__) -// SunOS extension -void cfree(void* p) { TCMallocInternalCfree(p); } -#endif - -#if defined(OS_MACOSX) || defined(__BIONIC__) || defined(__GLIBC__) || \ - defined(__NEWLIB__) || defined(__UCLIBC__) -// Obsolete memalign -void* valloc(size_t s) { return TCMallocInternalValloc(s); } -#endif - -#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) -// Obsolete memalign -void* pvalloc(size_t s) { return TCMallocInternalPvalloc(s); } -#endif - -#if defined(__GLIBC__) || defined(__NEWLIB__) || defined(__UCLIBC__) -void malloc_stats(void) { TCMallocInternalMallocStats(); } -#endif - -#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ - defined(__UCLIBC__) -int mallopt(int cmd, int v) { return TCMallocInternalMallOpt(cmd, v); } -#endif - -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO -struct mallinfo mallinfo(void) { + +#if defined(__GLIBC__) || defined(__NEWLIB__) +// SunOS extension +void cfree(void* p) { TCMallocInternalCfree(p); } +#endif + +#if defined(OS_MACOSX) || defined(__BIONIC__) || defined(__GLIBC__) || \ + defined(__NEWLIB__) || defined(__UCLIBC__) +// Obsolete memalign +void* valloc(size_t s) { return TCMallocInternalValloc(s); } +#endif + +#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) +// Obsolete memalign +void* pvalloc(size_t s) { return TCMallocInternalPvalloc(s); } +#endif + +#if defined(__GLIBC__) || defined(__NEWLIB__) || defined(__UCLIBC__) +void malloc_stats(void) { TCMallocInternalMallocStats(); } +#endif + +#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \ + defined(__UCLIBC__) +int mallopt(int cmd, int v) { return TCMallocInternalMallOpt(cmd, v); } +#endif + +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO +struct mallinfo mallinfo(void) { return TCMallocInternalMallocInfo(); } #endif - -#if defined(__GLIBC__) -size_t malloc_size(void* p) { return TCMallocInternalMallocSize(p); } -#endif + +#if defined(__GLIBC__) +size_t malloc_size(void* p) { return TCMallocInternalMallocSize(p); } +#endif } // extern "C" #endif // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc index ad3205fcdc..4ccf813bff 100644 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc @@ -26,7 +26,7 @@ #include "absl/base/attributes.h" #include "absl/base/internal/low_level_alloc.h" #include "absl/memory/memory.h" -#include "absl/time/time.h" +#include "absl/time/time.h" #include "tcmalloc/internal/parameter_accessors.h" #include "tcmalloc/internal_malloc_extension.h" @@ -287,16 +287,16 @@ bool MallocExtension::PerCpuCachesActive() { #endif } -void MallocExtension::DeactivatePerCpuCaches() { -#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) { - return; - } - - MallocExtension_Internal_DeactivatePerCpuCaches(); -#endif -} - +void MallocExtension::DeactivatePerCpuCaches() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) { + return; + } + + MallocExtension_Internal_DeactivatePerCpuCaches(); +#endif +} + int32_t MallocExtension::GetMaxPerCpuCacheSize() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) { @@ -345,32 +345,32 @@ void MallocExtension::SetMaxTotalThreadCacheBytes(int64_t value) { #endif } -absl::Duration MallocExtension::GetSkipSubreleaseInterval() { -#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_GetSkipSubreleaseInterval == nullptr) { - return absl::ZeroDuration(); - } - - absl::Duration value; - MallocExtension_Internal_GetSkipSubreleaseInterval(&value); - return value; -#else - return absl::ZeroDuration(); -#endif -} - -void MallocExtension::SetSkipSubreleaseInterval(absl::Duration value) { -#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (MallocExtension_Internal_SetSkipSubreleaseInterval == nullptr) { - return; - } - - MallocExtension_Internal_SetSkipSubreleaseInterval(value); -#else - (void)value; -#endif -} - +absl::Duration MallocExtension::GetSkipSubreleaseInterval() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_GetSkipSubreleaseInterval == nullptr) { + return absl::ZeroDuration(); + } + + absl::Duration value; + MallocExtension_Internal_GetSkipSubreleaseInterval(&value); + return value; +#else + return absl::ZeroDuration(); +#endif +} + +void MallocExtension::SetSkipSubreleaseInterval(absl::Duration value) { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (MallocExtension_Internal_SetSkipSubreleaseInterval == nullptr) { + return; + } + + MallocExtension_Internal_SetSkipSubreleaseInterval(value); +#else + (void)value; +#endif +} + absl::optional<size_t> MallocExtension::GetNumericProperty( absl::string_view property) { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS @@ -429,20 +429,20 @@ size_t MallocExtension::ReleaseCpuMemory(int cpu) { void MallocExtension::ProcessBackgroundActions() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (NeedsProcessBackgroundActions()) { + if (NeedsProcessBackgroundActions()) { MallocExtension_Internal_ProcessBackgroundActions(); } #endif } -bool MallocExtension::NeedsProcessBackgroundActions() { -#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - return &MallocExtension_Internal_ProcessBackgroundActions != nullptr; -#else - return false; -#endif -} - +bool MallocExtension::NeedsProcessBackgroundActions() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + return &MallocExtension_Internal_ProcessBackgroundActions != nullptr; +#else + return false; +#endif +} + MallocExtension::BytesPerSecond MallocExtension::GetBackgroundReleaseRate() { #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS if (&MallocExtension_Internal_GetBackgroundReleaseRate != nullptr) { @@ -460,14 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) { #endif } -void MallocExtension::EnableForkSupport() { -#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS - if (&MallocExtension_EnableForkSupport != nullptr) { - MallocExtension_EnableForkSupport(); - } -#endif -} - +void MallocExtension::EnableForkSupport() { +#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS + if (&MallocExtension_EnableForkSupport != nullptr) { + MallocExtension_EnableForkSupport(); + } +#endif +} + void MallocExtension::SetSampleUserDataCallbacks( CreateSampleUserDataCallback create, CopySampleUserDataCallback copy, diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h index fcbd347ca1..a55be1850a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h @@ -35,11 +35,11 @@ #include "absl/base/attributes.h" #include "absl/base/macros.h" -#include "absl/base/policy_checks.h" +#include "absl/base/policy_checks.h" #include "absl/base/port.h" #include "absl/functional/function_ref.h" #include "absl/strings/string_view.h" -#include "absl/time/time.h" +#include "absl/time/time.h" #include "absl/types/optional.h" #include "absl/types/span.h" @@ -299,9 +299,9 @@ class MallocExtension final { // Note: limit=SIZE_T_MAX implies no limit. size_t limit = std::numeric_limits<size_t>::max(); bool hard = false; - - // Explicitly declare the ctor to put it in the google_malloc section. - MemoryLimit() = default; + + // Explicitly declare the ctor to put it in the google_malloc section. + MemoryLimit() = default; }; static MemoryLimit GetMemoryLimit(); @@ -315,13 +315,13 @@ class MallocExtension final { // Gets the guarded sampling rate. Returns a value < 0 if unknown. static int64_t GetGuardedSamplingRate(); - // Sets the guarded sampling rate for sampled allocations. TCMalloc samples - // approximately every rate bytes allocated, subject to implementation - // limitations in GWP-ASan. - // - // Guarded samples provide probablistic protections against buffer underflow, - // overflow, and use-after-free when GWP-ASan is active (via calling - // ActivateGuardedSampling). + // Sets the guarded sampling rate for sampled allocations. TCMalloc samples + // approximately every rate bytes allocated, subject to implementation + // limitations in GWP-ASan. + // + // Guarded samples provide probablistic protections against buffer underflow, + // overflow, and use-after-free when GWP-ASan is active (via calling + // ActivateGuardedSampling). static void SetGuardedSamplingRate(int64_t rate); // Switches TCMalloc to guard sampled allocations for underflow, overflow, and @@ -331,11 +331,11 @@ class MallocExtension final { // Gets whether TCMalloc is using per-CPU caches. static bool PerCpuCachesActive(); - // Extension for unified agent. - // - // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321 - static void DeactivatePerCpuCaches(); - + // Extension for unified agent. + // + // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321 + static void DeactivatePerCpuCaches(); + // Gets the current maximum cache size per CPU cache. static int32_t GetMaxPerCpuCacheSize(); // Sets the maximum cache size per CPU cache. This is a per-core limit. @@ -346,11 +346,11 @@ class MallocExtension final { // Sets the maximum thread cache size. This is a whole-process limit. static void SetMaxTotalThreadCacheBytes(int64_t value); - // Gets the delayed subrelease interval (0 if delayed subrelease is disabled) - static absl::Duration GetSkipSubreleaseInterval(); - // Sets the delayed subrelease interval (0 to disable delayed subrelease) - static void SetSkipSubreleaseInterval(absl::Duration value); - + // Gets the delayed subrelease interval (0 if delayed subrelease is disabled) + static absl::Duration GetSkipSubreleaseInterval(); + // Sets the delayed subrelease interval (0 to disable delayed subrelease) + static void SetSkipSubreleaseInterval(absl::Duration value); + // Returns the estimated number of bytes that will be allocated for a request // of "size" bytes. This is an estimate: an allocation of "size" bytes may // reserve more bytes, but will never reserve fewer. @@ -454,11 +454,11 @@ class MallocExtension final { // When linked against TCMalloc, this method does not return. static void ProcessBackgroundActions(); - // Return true if ProcessBackgroundActions should be called on this platform. - // Not all platforms need/support background actions. As of 2021 this - // includes Apple and Emscripten. - static bool NeedsProcessBackgroundActions(); - + // Return true if ProcessBackgroundActions should be called on this platform. + // Not all platforms need/support background actions. As of 2021 this + // includes Apple and Emscripten. + static bool NeedsProcessBackgroundActions(); + // Specifies a rate in bytes per second. // // The enum is used to provide strong-typing for the value. @@ -470,10 +470,10 @@ class MallocExtension final { // Specifies the release rate from the page heap. ProcessBackgroundActions // must be called for this to be operative. static void SetBackgroundReleaseRate(BytesPerSecond rate); - - // Enables fork support. - // Allocator will continue to function correctly in the child, after calling fork(). - static void EnableForkSupport(); + + // Enables fork support. + // Allocator will continue to function correctly in the child, after calling fork(). + static void EnableForkSupport(); using CreateSampleUserDataCallback = void*(); using CopySampleUserDataCallback = void*(void*); @@ -550,7 +550,7 @@ tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow( // Aligned size returning new is only supported for libc++ because of issues // with libstdcxx.so linkage. See http://b/110969867 for background. -#if defined(__cpp_aligned_new) +#if defined(__cpp_aligned_new) // Identical to `tcmalloc_size_returning_operator_new` except that the returned // memory is aligned according to the `alignment` argument. @@ -559,7 +559,7 @@ tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned( tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow( size_t size, std::align_val_t alignment) noexcept; -#endif // __cpp_aligned_new +#endif // __cpp_aligned_new } // extern "C" @@ -578,9 +578,9 @@ namespace tcmalloc_internal { // while allowing the library to compile and link. class AllocationProfilingTokenBase { public: - // Explicitly declare the ctor to put it in the google_malloc section. - AllocationProfilingTokenBase() = default; - + // Explicitly declare the ctor to put it in the google_malloc section. + AllocationProfilingTokenBase() = default; + virtual ~AllocationProfilingTokenBase() = default; // Finish recording started during construction of this object. diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc index 5088806ff8..ce5fb0501a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc @@ -18,7 +18,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/time/time.h" +#include "absl/time/time.h" namespace tcmalloc { namespace { @@ -39,17 +39,17 @@ TEST(MallocExtension, BackgroundReleaseRate) { 0); } -TEST(MallocExtension, SkipSubreleaseInterval) { - - // Mutate via MallocExtension. - MallocExtension::SetSkipSubreleaseInterval(absl::Seconds(10)); - EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::Seconds(10)); - - // Disable skip subrelease - MallocExtension::SetSkipSubreleaseInterval(absl::ZeroDuration()); - EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::ZeroDuration()); -} - +TEST(MallocExtension, SkipSubreleaseInterval) { + + // Mutate via MallocExtension. + MallocExtension::SetSkipSubreleaseInterval(absl::Seconds(10)); + EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::Seconds(10)); + + // Disable skip subrelease + MallocExtension::SetSkipSubreleaseInterval(absl::ZeroDuration()); + EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::ZeroDuration()); +} + TEST(MallocExtension, Properties) { // Verify that every property under GetProperties also works with // GetNumericProperty. diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc index 13308b947a..600cbbf18f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc +++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc @@ -18,19 +18,19 @@ #include "tcmalloc/internal/logging.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { void MinimalFakeCentralFreeList::AllocateBatch(void** batch, int n) { for (int i = 0; i < n; ++i) batch[i] = &batch[i]; } -void MinimalFakeCentralFreeList::FreeBatch(absl::Span<void*> batch) { - for (void* x : batch) CHECK_CONDITION(x != nullptr); +void MinimalFakeCentralFreeList::FreeBatch(absl::Span<void*> batch) { + for (void* x : batch) CHECK_CONDITION(x != nullptr); } -void MinimalFakeCentralFreeList::InsertRange(absl::Span<void*> batch) { +void MinimalFakeCentralFreeList::InsertRange(absl::Span<void*> batch) { absl::base_internal::SpinLockHolder h(&lock_); - FreeBatch(batch); + FreeBatch(batch); } int MinimalFakeCentralFreeList::RemoveRange(void** batch, int n) { @@ -45,14 +45,14 @@ void FakeCentralFreeList::AllocateBatch(void** batch, int n) { } } -void FakeCentralFreeList::FreeBatch(absl::Span<void*> batch) { - for (void* x : batch) { - ::operator delete(x); +void FakeCentralFreeList::FreeBatch(absl::Span<void*> batch) { + for (void* x : batch) { + ::operator delete(x); } } -void FakeCentralFreeList::InsertRange(absl::Span<void*> batch) { - FreeBatch(batch); +void FakeCentralFreeList::InsertRange(absl::Span<void*> batch) { + FreeBatch(batch); } int FakeCentralFreeList::RemoveRange(void** batch, int n) { @@ -60,5 +60,5 @@ int FakeCentralFreeList::RemoveRange(void** batch, int n) { return n; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h index c2a56c0c60..0eb3c8dbfc 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h +++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h @@ -19,18 +19,18 @@ #include "gmock/gmock.h" #include "absl/base/internal/spinlock.h" -#include "absl/types/span.h" +#include "absl/types/span.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class FakeCentralFreeListBase { public: - FakeCentralFreeListBase() {} + FakeCentralFreeListBase() {} FakeCentralFreeListBase(const FakeCentralFreeListBase&) = delete; FakeCentralFreeListBase& operator=(const FakeCentralFreeListBase&) = delete; - static constexpr void Init(size_t) {} + static constexpr void Init(size_t) {} }; // CentralFreeList implementation that backs onto the system's malloc. @@ -39,11 +39,11 @@ class FakeCentralFreeListBase { // is important. class FakeCentralFreeList : public FakeCentralFreeListBase { public: - void InsertRange(absl::Span<void*> batch); + void InsertRange(absl::Span<void*> batch); int RemoveRange(void** batch, int N); void AllocateBatch(void** batch, int n); - void FreeBatch(absl::Span<void*> batch); + void FreeBatch(absl::Span<void*> batch); }; // CentralFreeList implementation that does minimal work but no correctness @@ -52,11 +52,11 @@ class FakeCentralFreeList : public FakeCentralFreeListBase { // Useful for benchmarks where you want to avoid unrelated expensive operations. class MinimalFakeCentralFreeList : public FakeCentralFreeListBase { public: - void InsertRange(absl::Span<void*> batch); + void InsertRange(absl::Span<void*> batch); int RemoveRange(void** batch, int N); void AllocateBatch(void** batch, int n); - void FreeBatch(absl::Span<void*> batch); + void FreeBatch(absl::Span<void*> batch); private: absl::base_internal::SpinLock lock_; @@ -69,21 +69,21 @@ class MinimalFakeCentralFreeList : public FakeCentralFreeListBase { class RawMockCentralFreeList : public FakeCentralFreeList { public: RawMockCentralFreeList() : FakeCentralFreeList() { - ON_CALL(*this, InsertRange).WillByDefault([this](absl::Span<void*> batch) { - return static_cast<FakeCentralFreeList*>(this)->InsertRange(batch); + ON_CALL(*this, InsertRange).WillByDefault([this](absl::Span<void*> batch) { + return static_cast<FakeCentralFreeList*>(this)->InsertRange(batch); }); ON_CALL(*this, RemoveRange).WillByDefault([this](void** batch, int n) { return static_cast<FakeCentralFreeList*>(this)->RemoveRange(batch, n); }); } - MOCK_METHOD(void, InsertRange, (absl::Span<void*> batch)); + MOCK_METHOD(void, InsertRange, (absl::Span<void*> batch)); MOCK_METHOD(int, RemoveRange, (void** batch, int N)); }; using MockCentralFreeList = testing::NiceMock<RawMockCentralFreeList>; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc #endif // TCMALLOC_MOCK_CENTRAL_FREELIST_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc index b8b2bcf131..b8216d22e2 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc @@ -15,10 +15,10 @@ #include "tcmalloc/mock_transfer_cache.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { int FakeTransferCacheManager::DetermineSizeClassToEvict() { return 3; } bool FakeTransferCacheManager::ShrinkCache(int) { return true; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h index 5b5192f6dc..72d3d79802 100644 --- a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h @@ -17,8 +17,8 @@ #include <stddef.h> -#include <algorithm> -#include <memory> +#include <algorithm> +#include <memory> #include <random> #include "gmock/gmock.h" @@ -26,14 +26,14 @@ #include "absl/random/random.h" #include "tcmalloc/common.h" #include "tcmalloc/mock_central_freelist.h" -#include "tcmalloc/transfer_cache_internals.h" +#include "tcmalloc/transfer_cache_internals.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { inline constexpr size_t kClassSize = 8; inline constexpr size_t kNumToMove = 32; -inline constexpr int kSizeClass = 0; +inline constexpr int kSizeClass = 0; class FakeTransferCacheManagerBase { public: @@ -110,16 +110,16 @@ class FakeTransferCacheEnvironment { using Manager = typename TransferCache::Manager; using FreeList = typename TransferCache::FreeList; - static constexpr int kMaxObjectsToMove = - ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove; + static constexpr int kMaxObjectsToMove = + ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove; static constexpr int kBatchSize = Manager::num_objects_to_move(1); - FakeTransferCacheEnvironment() : manager_(), cache_(&manager_, 1) {} + FakeTransferCacheEnvironment() : manager_(), cache_(&manager_, 1) {} ~FakeTransferCacheEnvironment() { Drain(); } - void Shrink() { cache_.ShrinkCache(kSizeClass); } - void Grow() { cache_.GrowCache(kSizeClass); } + void Shrink() { cache_.ShrinkCache(kSizeClass); } + void Grow() { cache_.GrowCache(kSizeClass); } void Insert(int n) { std::vector<void*> bufs; @@ -127,7 +127,7 @@ class FakeTransferCacheEnvironment { int b = std::min(n, kBatchSize); bufs.resize(b); central_freelist().AllocateBatch(&bufs[0], b); - cache_.InsertRange(kSizeClass, absl::MakeSpan(bufs)); + cache_.InsertRange(kSizeClass, absl::MakeSpan(bufs)); n -= b; } } @@ -137,11 +137,11 @@ class FakeTransferCacheEnvironment { while (n > 0) { int b = std::min(n, kBatchSize); bufs.resize(b); - int removed = cache_.RemoveRange(kSizeClass, &bufs[0], b); + int removed = cache_.RemoveRange(kSizeClass, &bufs[0], b); // Ensure we make progress. ASSERT_GT(removed, 0); ASSERT_LE(removed, b); - central_freelist().FreeBatch({&bufs[0], static_cast<size_t>(removed)}); + central_freelist().FreeBatch({&bufs[0], static_cast<size_t>(removed)}); n -= removed; } } @@ -158,9 +158,9 @@ class FakeTransferCacheEnvironment { Shrink(); } else if (choice < 0.2) { Grow(); - } else if (choice < 0.3) { - cache_.HasSpareCapacity(kSizeClass); - } else if (choice < 0.65) { + } else if (choice < 0.3) { + cache_.HasSpareCapacity(kSizeClass); + } else if (choice < 0.65) { Insert(absl::Uniform(gen, 1, kBatchSize)); } else { Remove(absl::Uniform(gen, 1, kBatchSize)); @@ -178,133 +178,133 @@ class FakeTransferCacheEnvironment { TransferCache cache_; }; -// A fake transfer cache manager class which supports two size classes instead -// of just the one. To make this work, we have to store the transfer caches -// inside the cache manager, like in production code. -template <typename FreeListT, - template <typename FreeList, typename Manager> class TransferCacheT> -class TwoSizeClassManager : public FakeTransferCacheManagerBase { - public: - using FreeList = FreeListT; - using TransferCache = TransferCacheT<FreeList, TwoSizeClassManager>; - - // This is 3 instead of 2 because we hard code cl == 0 to be invalid in many - // places. We only use cl 1 and 2 here. - static constexpr int kSizeClasses = 3; - static constexpr size_t kClassSize1 = 8; - static constexpr size_t kClassSize2 = 16; - static constexpr size_t kNumToMove1 = 32; - static constexpr size_t kNumToMove2 = 16; - - TwoSizeClassManager() { - caches_.push_back(absl::make_unique<TransferCache>(this, 0)); - caches_.push_back(absl::make_unique<TransferCache>(this, 1)); - caches_.push_back(absl::make_unique<TransferCache>(this, 2)); - } - - constexpr static size_t class_to_size(int size_class) { - switch (size_class) { - case 1: - return kClassSize1; - case 2: - return kClassSize2; - default: - return 0; - } - } - constexpr static size_t num_objects_to_move(int size_class) { - switch (size_class) { - case 1: - return kNumToMove1; - case 2: - return kNumToMove2; - default: - return 0; - } - } - - int DetermineSizeClassToEvict() { return evicting_from_; } - - bool ShrinkCache(int size_class) { - return caches_[size_class]->ShrinkCache(size_class); - } - - FreeList& central_freelist(int cl) { return caches_[cl]->freelist(); } - - void InsertRange(int cl, absl::Span<void*> batch) { - caches_[cl]->InsertRange(cl, batch); - } - - int RemoveRange(int cl, void** batch, int N) { - return caches_[cl]->RemoveRange(cl, batch, N); - } - - bool HasSpareCapacity(int cl) { return caches_[cl]->HasSpareCapacity(cl); } - - size_t tc_length(int cl) { return caches_[cl]->tc_length(); } - - std::vector<std::unique_ptr<TransferCache>> caches_; - - // From which size class to evict. - int evicting_from_ = 1; -}; - -template <template <typename FreeList, typename Manager> class TransferCacheT> -class TwoSizeClassEnv { - public: - using FreeList = MockCentralFreeList; - using Manager = TwoSizeClassManager<FreeList, TransferCacheT>; - using TransferCache = typename Manager::TransferCache; - - static constexpr int kMaxObjectsToMove = - ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove; - - explicit TwoSizeClassEnv() = default; - - ~TwoSizeClassEnv() { Drain(); } - - void Insert(int cl, int n) { - const size_t batch_size = Manager::num_objects_to_move(cl); - std::vector<void*> bufs; - while (n > 0) { - int b = std::min<int>(n, batch_size); - bufs.resize(b); - central_freelist(cl).AllocateBatch(&bufs[0], b); - manager_.InsertRange(cl, absl::MakeSpan(bufs)); - n -= b; - } - } - - void Remove(int cl, int n) { - const size_t batch_size = Manager::num_objects_to_move(cl); - std::vector<void*> bufs; - while (n > 0) { - const int b = std::min<int>(n, batch_size); - bufs.resize(b); - const int removed = manager_.RemoveRange(cl, &bufs[0], b); - // Ensure we make progress. - ASSERT_GT(removed, 0); - ASSERT_LE(removed, b); - central_freelist(cl).FreeBatch({&bufs[0], static_cast<size_t>(removed)}); - n -= removed; - } - } - - void Drain() { - for (int i = 0; i < Manager::kSizeClasses; ++i) { - Remove(i, manager_.tc_length(i)); - } - } - - Manager& transfer_cache_manager() { return manager_; } - - FreeList& central_freelist(int cl) { return manager_.central_freelist(cl); } - - private: - Manager manager_; -}; - -} // namespace tcmalloc_internal +// A fake transfer cache manager class which supports two size classes instead +// of just the one. To make this work, we have to store the transfer caches +// inside the cache manager, like in production code. +template <typename FreeListT, + template <typename FreeList, typename Manager> class TransferCacheT> +class TwoSizeClassManager : public FakeTransferCacheManagerBase { + public: + using FreeList = FreeListT; + using TransferCache = TransferCacheT<FreeList, TwoSizeClassManager>; + + // This is 3 instead of 2 because we hard code cl == 0 to be invalid in many + // places. We only use cl 1 and 2 here. + static constexpr int kSizeClasses = 3; + static constexpr size_t kClassSize1 = 8; + static constexpr size_t kClassSize2 = 16; + static constexpr size_t kNumToMove1 = 32; + static constexpr size_t kNumToMove2 = 16; + + TwoSizeClassManager() { + caches_.push_back(absl::make_unique<TransferCache>(this, 0)); + caches_.push_back(absl::make_unique<TransferCache>(this, 1)); + caches_.push_back(absl::make_unique<TransferCache>(this, 2)); + } + + constexpr static size_t class_to_size(int size_class) { + switch (size_class) { + case 1: + return kClassSize1; + case 2: + return kClassSize2; + default: + return 0; + } + } + constexpr static size_t num_objects_to_move(int size_class) { + switch (size_class) { + case 1: + return kNumToMove1; + case 2: + return kNumToMove2; + default: + return 0; + } + } + + int DetermineSizeClassToEvict() { return evicting_from_; } + + bool ShrinkCache(int size_class) { + return caches_[size_class]->ShrinkCache(size_class); + } + + FreeList& central_freelist(int cl) { return caches_[cl]->freelist(); } + + void InsertRange(int cl, absl::Span<void*> batch) { + caches_[cl]->InsertRange(cl, batch); + } + + int RemoveRange(int cl, void** batch, int N) { + return caches_[cl]->RemoveRange(cl, batch, N); + } + + bool HasSpareCapacity(int cl) { return caches_[cl]->HasSpareCapacity(cl); } + + size_t tc_length(int cl) { return caches_[cl]->tc_length(); } + + std::vector<std::unique_ptr<TransferCache>> caches_; + + // From which size class to evict. + int evicting_from_ = 1; +}; + +template <template <typename FreeList, typename Manager> class TransferCacheT> +class TwoSizeClassEnv { + public: + using FreeList = MockCentralFreeList; + using Manager = TwoSizeClassManager<FreeList, TransferCacheT>; + using TransferCache = typename Manager::TransferCache; + + static constexpr int kMaxObjectsToMove = + ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove; + + explicit TwoSizeClassEnv() = default; + + ~TwoSizeClassEnv() { Drain(); } + + void Insert(int cl, int n) { + const size_t batch_size = Manager::num_objects_to_move(cl); + std::vector<void*> bufs; + while (n > 0) { + int b = std::min<int>(n, batch_size); + bufs.resize(b); + central_freelist(cl).AllocateBatch(&bufs[0], b); + manager_.InsertRange(cl, absl::MakeSpan(bufs)); + n -= b; + } + } + + void Remove(int cl, int n) { + const size_t batch_size = Manager::num_objects_to_move(cl); + std::vector<void*> bufs; + while (n > 0) { + const int b = std::min<int>(n, batch_size); + bufs.resize(b); + const int removed = manager_.RemoveRange(cl, &bufs[0], b); + // Ensure we make progress. + ASSERT_GT(removed, 0); + ASSERT_LE(removed, b); + central_freelist(cl).FreeBatch({&bufs[0], static_cast<size_t>(removed)}); + n -= removed; + } + } + + void Drain() { + for (int i = 0; i < Manager::kSizeClasses; ++i) { + Remove(i, manager_.tc_length(i)); + } + } + + Manager& transfer_cache_manager() { return manager_; } + + FreeList& central_freelist(int cl) { return manager_.central_freelist(cl); } + + private: + Manager manager_; +}; + +} // namespace tcmalloc_internal } // namespace tcmalloc #endif // TCMALLOC_MOCK_TRANSFER_CACHE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc index c6dc90adcc..898fda1adb 100644 --- a/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc +++ b/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc @@ -16,9 +16,9 @@ #include "tcmalloc/runtime_size_classes.h" #include "tcmalloc/size_class_info.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Default implementation doesn't load runtime size classes. // To enable runtime size classes, link with :runtime_size_classes. @@ -28,6 +28,6 @@ ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE int MaybeSizeClassesFromEnv( return -1; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc index e9599ef46a..b379935fc1 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc +++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc @@ -25,9 +25,9 @@ #include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { int ABSL_ATTRIBUTE_WEAK default_want_hpaa(); @@ -44,32 +44,32 @@ bool decide_want_hpaa() { const char *e = tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_HPAA_CONTROL"); if (e) { - switch (e[0]) { - case '0': - if (kPageShift <= 12) { - return false; - } - - if (default_want_hpaa != nullptr) { - int default_hpaa = default_want_hpaa(); - if (default_hpaa < 0) { - return false; - } - } - - Log(kLog, __FILE__, __LINE__, - "Runtime opt-out from HPAA requires building with " - "//tcmalloc:want_no_hpaa." - ); - break; - case '1': - return true; - case '2': - return true; - default: - Crash(kCrash, __FILE__, __LINE__, "bad env var", e); - return false; - } + switch (e[0]) { + case '0': + if (kPageShift <= 12) { + return false; + } + + if (default_want_hpaa != nullptr) { + int default_hpaa = default_want_hpaa(); + if (default_hpaa < 0) { + return false; + } + } + + Log(kLog, __FILE__, __LINE__, + "Runtime opt-out from HPAA requires building with " + "//tcmalloc:want_no_hpaa." + ); + break; + case '1': + return true; + case '2': + return true; + default: + Crash(kCrash, __FILE__, __LINE__, "bad env var", e); + return false; + } } if (default_want_hpaa != nullptr) { @@ -96,22 +96,22 @@ bool want_hpaa() { PageAllocator::PageAllocator() { const bool kUseHPAA = want_hpaa(); if (kUseHPAA) { - normal_impl_[0] = + normal_impl_[0] = new (&choices_[0].hpaa) HugePageAwareAllocator(MemoryTag::kNormal); - if (Static::numa_topology().numa_aware()) { - normal_impl_[1] = - new (&choices_[1].hpaa) HugePageAwareAllocator(MemoryTag::kNormalP1); - } - sampled_impl_ = new (&choices_[kNumaPartitions + 0].hpaa) - HugePageAwareAllocator(MemoryTag::kSampled); + if (Static::numa_topology().numa_aware()) { + normal_impl_[1] = + new (&choices_[1].hpaa) HugePageAwareAllocator(MemoryTag::kNormalP1); + } + sampled_impl_ = new (&choices_[kNumaPartitions + 0].hpaa) + HugePageAwareAllocator(MemoryTag::kSampled); alg_ = HPAA; } else { - normal_impl_[0] = new (&choices_[0].ph) PageHeap(MemoryTag::kNormal); - if (Static::numa_topology().numa_aware()) { - normal_impl_[1] = new (&choices_[1].ph) PageHeap(MemoryTag::kNormalP1); - } - sampled_impl_ = - new (&choices_[kNumaPartitions + 0].ph) PageHeap(MemoryTag::kSampled); + normal_impl_[0] = new (&choices_[0].ph) PageHeap(MemoryTag::kNormal); + if (Static::numa_topology().numa_aware()) { + normal_impl_[1] = new (&choices_[1].ph) PageHeap(MemoryTag::kNormalP1); + } + sampled_impl_ = + new (&choices_[kNumaPartitions + 0].ph) PageHeap(MemoryTag::kSampled); alg_ = PAGE_HEAP; } } @@ -172,12 +172,12 @@ bool PageAllocator::ShrinkHardBy(Length pages) { limit_, "without breaking hugepages - performance will drop"); warned_hugepages = true; } - for (int partition = 0; partition < active_numa_partitions(); partition++) { - ret += static_cast<HugePageAwareAllocator *>(normal_impl_[partition]) - ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret); - if (ret >= pages) { - return true; - } + for (int partition = 0; partition < active_numa_partitions(); partition++) { + ret += static_cast<HugePageAwareAllocator *>(normal_impl_[partition]) + ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret); + if (ret >= pages) { + return true; + } } ret += static_cast<HugePageAwareAllocator *>(sampled_impl_) @@ -187,10 +187,10 @@ bool PageAllocator::ShrinkHardBy(Length pages) { return (pages <= ret); } -size_t PageAllocator::active_numa_partitions() const { - return Static::numa_topology().active_partitions(); -} - -} // namespace tcmalloc_internal +size_t PageAllocator::active_numa_partitions() const { + return Static::numa_topology().active_partitions(); +} + +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator.h index 611482f999..174fb791cd 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator.h @@ -31,9 +31,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class PageAllocator { public: @@ -76,7 +76,7 @@ class PageAllocator { ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); // Prints stats about the page heap to *out. - void Print(Printer* out, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock); + void Print(Printer* out, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock); void PrintInPbtxt(PbtxtRegion* region, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock); @@ -104,10 +104,10 @@ class PageAllocator { ABSL_ATTRIBUTE_RETURNS_NONNULL PageAllocatorInterface* impl( MemoryTag tag) const; - size_t active_numa_partitions() const; - - static constexpr size_t kNumHeaps = kNumaPartitions + 1; + size_t active_numa_partitions() const; + static constexpr size_t kNumHeaps = kNumaPartitions + 1; + union Choices { Choices() : dummy(0) {} ~Choices() {} @@ -115,7 +115,7 @@ class PageAllocator { PageHeap ph; HugePageAwareAllocator hpaa; } choices_[kNumHeaps]; - std::array<PageAllocatorInterface*, kNumaPartitions> normal_impl_; + std::array<PageAllocatorInterface*, kNumaPartitions> normal_impl_; PageAllocatorInterface* sampled_impl_; Algorithm alg_; @@ -128,10 +128,10 @@ class PageAllocator { inline PageAllocatorInterface* PageAllocator::impl(MemoryTag tag) const { switch (tag) { - case MemoryTag::kNormalP0: - return normal_impl_[0]; - case MemoryTag::kNormalP1: - return normal_impl_[1]; + case MemoryTag::kNormalP0: + return normal_impl_[0]; + case MemoryTag::kNormalP1: + return normal_impl_[1]; case MemoryTag::kSampled: return sampled_impl_; default: @@ -153,51 +153,51 @@ inline void PageAllocator::Delete(Span* span, MemoryTag tag) { } inline BackingStats PageAllocator::stats() const { - BackingStats ret = normal_impl_[0]->stats(); - for (int partition = 1; partition < active_numa_partitions(); partition++) { - ret += normal_impl_[partition]->stats(); - } - ret += sampled_impl_->stats(); - return ret; + BackingStats ret = normal_impl_[0]->stats(); + for (int partition = 1; partition < active_numa_partitions(); partition++) { + ret += normal_impl_[partition]->stats(); + } + ret += sampled_impl_->stats(); + return ret; } inline void PageAllocator::GetSmallSpanStats(SmallSpanStats* result) { SmallSpanStats normal, sampled; - for (int partition = 0; partition < active_numa_partitions(); partition++) { - SmallSpanStats part_stats; - normal_impl_[partition]->GetSmallSpanStats(&part_stats); - normal += part_stats; - } + for (int partition = 0; partition < active_numa_partitions(); partition++) { + SmallSpanStats part_stats; + normal_impl_[partition]->GetSmallSpanStats(&part_stats); + normal += part_stats; + } sampled_impl_->GetSmallSpanStats(&sampled); *result = normal + sampled; } inline void PageAllocator::GetLargeSpanStats(LargeSpanStats* result) { LargeSpanStats normal, sampled; - for (int partition = 0; partition < active_numa_partitions(); partition++) { - LargeSpanStats part_stats; - normal_impl_[partition]->GetLargeSpanStats(&part_stats); - normal += part_stats; - } + for (int partition = 0; partition < active_numa_partitions(); partition++) { + LargeSpanStats part_stats; + normal_impl_[partition]->GetLargeSpanStats(&part_stats); + normal += part_stats; + } sampled_impl_->GetLargeSpanStats(&sampled); *result = normal + sampled; } inline Length PageAllocator::ReleaseAtLeastNPages(Length num_pages) { - Length released; - for (int partition = 0; partition < active_numa_partitions(); partition++) { - released += - normal_impl_[partition]->ReleaseAtLeastNPages(num_pages - released); - if (released >= num_pages) { - return released; - } + Length released; + for (int partition = 0; partition < active_numa_partitions(); partition++) { + released += + normal_impl_[partition]->ReleaseAtLeastNPages(num_pages - released); + if (released >= num_pages) { + return released; + } } released += sampled_impl_->ReleaseAtLeastNPages(num_pages - released); return released; } -inline void PageAllocator::Print(Printer* out, MemoryTag tag) { +inline void PageAllocator::Print(Printer* out, MemoryTag tag) { const absl::string_view label = MemoryTagToLabel(tag); if (tag != MemoryTag::kNormal) { out->printf("\n>>>>>>> Begin %s page allocator <<<<<<<\n", label); @@ -234,8 +234,8 @@ inline const PageAllocInfo& PageAllocator::info(MemoryTag tag) const { return impl(tag)->info(); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PAGE_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc index 3173247acb..5707fbf4f3 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc +++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc @@ -27,17 +27,17 @@ #include "tcmalloc/internal/util.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { static int OpenLog(MemoryTag tag) { const char *fname = [&]() { switch (tag) { case MemoryTag::kNormal: return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE"); - case MemoryTag::kNormalP1: - return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE_P1"); + case MemoryTag::kNormalP1: + return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE_P1"); case MemoryTag::kSampled: return thread_safe_getenv("TCMALLOC_SAMPLED_PAGE_LOG_FILE"); default: @@ -84,6 +84,6 @@ PageAllocatorInterface::~PageAllocatorInterface() { Crash(kCrash, __FILE__, __LINE__, "should never destroy this"); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h index cf1dc67897..3dd7436586 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h +++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h @@ -26,9 +26,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class PageMap; @@ -73,7 +73,7 @@ class PageAllocatorInterface { ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0; // Prints stats about the page heap to *out. - virtual void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0; + virtual void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0; // Prints stats about the page heap in pbtxt format. // @@ -90,8 +90,8 @@ class PageAllocatorInterface { MemoryTag tag_; // The type of tagged memory this heap manages }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc index d302c085a9..af7b215050 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc @@ -39,7 +39,7 @@ #include "tcmalloc/stats.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class PageAllocatorTest : public testing::Test { @@ -79,7 +79,7 @@ class PageAllocatorTest : public testing::Test { std::string Print() { std::vector<char> buf(1024 * 1024); - Printer out(&buf[0], buf.size()); + Printer out(&buf[0], buf.size()); allocator_->Print(&out, MemoryTag::kNormal); return std::string(&buf[0]); @@ -141,5 +141,5 @@ TEST_F(PageAllocatorTest, PrintIt) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h index 55f134bfdd..8cfe7507ed 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h +++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h @@ -23,9 +23,9 @@ // TODO(b/116000878): Remove dependency on common.h if it causes ODR issues. #include "tcmalloc/common.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // AddressRegion that adds some padding on either side of each // allocation. This prevents multiple PageAllocators in the system @@ -72,8 +72,8 @@ class ExtraRegionFactory : public AddressRegionFactory { AddressRegionFactory *under_; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap.cc b/contrib/libs/tcmalloc/tcmalloc/page_heap.cc index c6b4c6dbd1..9bd8a7573c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_heap.cc +++ b/contrib/libs/tcmalloc/tcmalloc/page_heap.cc @@ -20,7 +20,7 @@ #include "absl/base/internal/cycleclock.h" #include "absl/base/internal/spinlock.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/page_heap_allocator.h" @@ -30,9 +30,9 @@ #include "tcmalloc/static_vars.h" #include "tcmalloc/system-alloc.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Helper function to record span address into pageheap void PageHeap::RecordSpan(Span* span) { @@ -132,7 +132,7 @@ static bool IsSpanBetter(Span* span, Span* best, Length n) { // don't bother. Span* PageHeap::NewAligned(Length n, Length align) { ASSERT(n > Length(0)); - ASSERT(absl::has_single_bit(align.raw_num())); + ASSERT(absl::has_single_bit(align.raw_num())); if (align <= Length(1)) { return New(n); @@ -493,7 +493,7 @@ void PageHeap::PrintInPbtxt(PbtxtRegion* region) { // We do not collect info_.PrintInPbtxt for now. } -void PageHeap::Print(Printer* out) { +void PageHeap::Print(Printer* out) { absl::base_internal::SpinLockHolder h(&pageheap_lock); SmallSpanStats small; GetSmallSpanStats(&small); @@ -523,6 +523,6 @@ void PageHeap::Print(Printer* out) { info_.Print(out); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap.h b/contrib/libs/tcmalloc/tcmalloc/page_heap.h index 86cf5d01df..1a5ec27a59 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_heap.h +++ b/contrib/libs/tcmalloc/tcmalloc/page_heap.h @@ -23,9 +23,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // ------------------------------------------------------------------------- // Page-level allocator @@ -40,7 +40,7 @@ class PageHeap final : public PageAllocatorInterface { explicit PageHeap(MemoryTag tag); // for testing PageHeap(PageMap* map, MemoryTag tag); - ~PageHeap() override = default; + ~PageHeap() override = default; // Allocate a run of "n" pages. Returns zero if out of memory. // Caller should not pass "n == 0" -- instead, n should have @@ -79,7 +79,7 @@ class PageHeap final : public PageAllocatorInterface { ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override; // Prints stats about the page heap to *out. - void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; + void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; void PrintInPbtxt(PbtxtRegion* region) ABSL_LOCKS_EXCLUDED(pageheap_lock) override; @@ -154,8 +154,8 @@ class PageHeap final : public PageAllocatorInterface { void RecordSpan(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PAGE_HEAP_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h b/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h index 5d2bbfe92c..63a80a4bda 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h +++ b/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h @@ -17,16 +17,16 @@ #include <stddef.h> -#include "absl/base/attributes.h" +#include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/base/thread_annotations.h" #include "tcmalloc/arena.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { struct AllocatorStats { // Number of allocated but unfreed objects @@ -52,8 +52,8 @@ class PageHeapAllocator { Delete(New()); } - ABSL_ATTRIBUTE_RETURNS_NONNULL T* New() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + ABSL_ATTRIBUTE_RETURNS_NONNULL T* New() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { // Consult free list T* result = free_list_; stats_.in_use++; @@ -65,8 +65,8 @@ class PageHeapAllocator { return result; } - void Delete(T* p) ABSL_ATTRIBUTE_NONNULL() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { + void Delete(T* p) ABSL_ATTRIBUTE_NONNULL() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { *(reinterpret_cast<void**>(p)) = free_list_; free_list_ = p; stats_.in_use--; @@ -86,8 +86,8 @@ class PageHeapAllocator { AllocatorStats stats_ ABSL_GUARDED_BY(pageheap_lock); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc b/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc index dc13a60cb7..249a91f7d0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc @@ -28,18 +28,18 @@ #include "tcmalloc/static_vars.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // PageHeap expands by kMinSystemAlloc by default, so use this as the minimum // Span length to not get more memory than expected. constexpr Length kMinSpanLength = BytesToLengthFloor(kMinSystemAlloc); -void CheckStats(const PageHeap* ph, Length system_pages, Length free_pages, - Length unmapped_pages) ABSL_LOCKS_EXCLUDED(pageheap_lock) { - BackingStats stats; +void CheckStats(const PageHeap* ph, Length system_pages, Length free_pages, + Length unmapped_pages) ABSL_LOCKS_EXCLUDED(pageheap_lock) { + BackingStats stats; { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + absl::base_internal::SpinLockHolder h(&pageheap_lock); stats = ph->stats(); } @@ -48,15 +48,15 @@ void CheckStats(const PageHeap* ph, Length system_pages, Length free_pages, ASSERT_EQ(unmapped_pages.in_bytes(), stats.unmapped_bytes); } -static void Delete(PageHeap* ph, Span* s) ABSL_LOCKS_EXCLUDED(pageheap_lock) { +static void Delete(PageHeap* ph, Span* s) ABSL_LOCKS_EXCLUDED(pageheap_lock) { { - absl::base_internal::SpinLockHolder h(&pageheap_lock); + absl::base_internal::SpinLockHolder h(&pageheap_lock); ph->Delete(s); } } -static Length Release(PageHeap* ph, Length n) { - absl::base_internal::SpinLockHolder h(&pageheap_lock); +static Length Release(PageHeap* ph, Length n) { + absl::base_internal::SpinLockHolder h(&pageheap_lock); return ph->ReleaseAtLeastNPages(n); } @@ -71,20 +71,20 @@ class PageHeapTest : public ::testing::Test { // TODO(b/36484267): replace this test wholesale. TEST_F(PageHeapTest, Stats) { - auto pagemap = absl::make_unique<PageMap>(); - void* memory = calloc(1, sizeof(PageHeap)); - PageHeap* ph = new (memory) PageHeap(pagemap.get(), MemoryTag::kNormal); + auto pagemap = absl::make_unique<PageMap>(); + void* memory = calloc(1, sizeof(PageHeap)); + PageHeap* ph = new (memory) PageHeap(pagemap.get(), MemoryTag::kNormal); // Empty page heap CheckStats(ph, Length(0), Length(0), Length(0)); // Allocate a span 's1' - Span* s1 = ph->New(kMinSpanLength); + Span* s1 = ph->New(kMinSpanLength); CheckStats(ph, kMinSpanLength, Length(0), Length(0)); // Allocate an aligned span 's2' static const Length kHalf = kMinSpanLength / 2; - Span* s2 = ph->NewAligned(kHalf, kHalf); + Span* s2 = ph->NewAligned(kHalf, kHalf); ASSERT_EQ(s2->first_page().index() % kHalf.raw_num(), 0); CheckStats(ph, kMinSpanLength * 2, Length(0), kHalf); @@ -105,5 +105,5 @@ TEST_F(PageHeapTest, Stats) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap.cc b/contrib/libs/tcmalloc/tcmalloc/pagemap.cc index 25962302c3..4270f58d12 100644 --- a/contrib/libs/tcmalloc/tcmalloc/pagemap.cc +++ b/contrib/libs/tcmalloc/tcmalloc/pagemap.cc @@ -20,9 +20,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { void PageMap::RegisterSizeClass(Span* span, size_t sc) { ASSERT(span->location() == Span::IN_USE); @@ -68,6 +68,6 @@ void* MetaDataAlloc(size_t bytes) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { return Static::arena().Alloc(bytes); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap.h b/contrib/libs/tcmalloc/tcmalloc/pagemap.h index 0cafa8a38d..e6da30b938 100644 --- a/contrib/libs/tcmalloc/tcmalloc/pagemap.h +++ b/contrib/libs/tcmalloc/tcmalloc/pagemap.h @@ -37,9 +37,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Two-level radix tree typedef void* (*PagemapAllocator)(size_t); @@ -69,8 +69,8 @@ class PageMap2 { // information. The size class information is kept segregated // since small object deallocations are so frequent and do not // need the other information kept in a Span. - CompactSizeClass sizeclass[kLeafLength]; - Span* span[kLeafLength]; + CompactSizeClass sizeclass[kLeafLength]; + Span* span[kLeafLength]; void* hugepage[kLeafHugepages]; }; @@ -94,7 +94,7 @@ class PageMap2 { // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. // Requires that the span is known to already exist. - Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS { + Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS { const Number i1 = k >> kLeafBits; const Number i2 = k & (kLeafLength - 1); ASSERT((k >> BITS) == 0); @@ -104,7 +104,7 @@ class PageMap2 { // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. // REQUIRES: Must be a valid page number previously Ensure()d. - CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE + CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE sizeclass(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS { const Number i1 = k >> kLeafBits; const Number i2 = k & (kLeafLength - 1); @@ -113,19 +113,19 @@ class PageMap2 { return root_[i1]->sizeclass[i2]; } - void set(Number k, Span* s) { + void set(Number k, Span* s) { ASSERT(k >> BITS == 0); const Number i1 = k >> kLeafBits; const Number i2 = k & (kLeafLength - 1); - root_[i1]->span[i2] = s; + root_[i1]->span[i2] = s; } - void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) { + void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) { ASSERT(k >> BITS == 0); const Number i1 = k >> kLeafBits; const Number i2 = k & (kLeafLength - 1); Leaf* leaf = root_[i1]; - leaf->span[i2] = s; + leaf->span[i2] = s; leaf->sizeclass[i2] = sc; } @@ -140,9 +140,9 @@ class PageMap2 { ASSERT(k >> BITS == 0); const Number i1 = k >> kLeafBits; const Number i2 = k & (kLeafLength - 1); - const Leaf* leaf = root_[i1]; - ASSERT(leaf != nullptr); - return leaf->hugepage[i2 >> (kLeafBits - kLeafHugeBits)]; + const Leaf* leaf = root_[i1]; + ASSERT(leaf != nullptr); + return leaf->hugepage[i2 >> (kLeafBits - kLeafHugeBits)]; } void set_hugepage(Number k, void* v) { @@ -216,8 +216,8 @@ class PageMap3 { // information. The size class information is kept segregated // since small object deallocations are so frequent and do not // need the other information kept in a Span. - CompactSizeClass sizeclass[kLeafLength]; - Span* span[kLeafLength]; + CompactSizeClass sizeclass[kLeafLength]; + Span* span[kLeafLength]; void* hugepage[kLeafHugepages]; }; @@ -248,7 +248,7 @@ class PageMap3 { // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. // Requires that the span is known to already exist. - Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS { + Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS { const Number i1 = k >> (kLeafBits + kMidBits); const Number i2 = (k >> kLeafBits) & (kMidLength - 1); const Number i3 = k & (kLeafLength - 1); @@ -260,7 +260,7 @@ class PageMap3 { // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. // REQUIRES: Must be a valid page number previously Ensure()d. - CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE + CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE sizeclass(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS { const Number i1 = k >> (kLeafBits + kMidBits); const Number i2 = (k >> kLeafBits) & (kMidLength - 1); @@ -271,21 +271,21 @@ class PageMap3 { return root_[i1]->leafs[i2]->sizeclass[i3]; } - void set(Number k, Span* s) { + void set(Number k, Span* s) { ASSERT(k >> BITS == 0); const Number i1 = k >> (kLeafBits + kMidBits); const Number i2 = (k >> kLeafBits) & (kMidLength - 1); const Number i3 = k & (kLeafLength - 1); - root_[i1]->leafs[i2]->span[i3] = s; + root_[i1]->leafs[i2]->span[i3] = s; } - void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) { + void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) { ASSERT(k >> BITS == 0); const Number i1 = k >> (kLeafBits + kMidBits); const Number i2 = (k >> kLeafBits) & (kMidLength - 1); const Number i3 = k & (kLeafLength - 1); Leaf* leaf = root_[i1]->leafs[i2]; - leaf->span[i3] = s; + leaf->span[i3] = s; leaf->sizeclass[i3] = sc; } @@ -302,11 +302,11 @@ class PageMap3 { const Number i1 = k >> (kLeafBits + kMidBits); const Number i2 = (k >> kLeafBits) & (kMidLength - 1); const Number i3 = k & (kLeafLength - 1); - const Node* node = root_[i1]; - ASSERT(node != nullptr); - const Leaf* leaf = node->leafs[i2]; - ASSERT(leaf != nullptr); - return leaf->hugepage[i3 >> (kLeafBits - kLeafHugeBits)]; + const Node* node = root_[i1]; + ASSERT(node != nullptr); + const Leaf* leaf = node->leafs[i2]; + ASSERT(leaf != nullptr); + return leaf->hugepage[i3 >> (kLeafBits - kLeafHugeBits)]; } void set_hugepage(Number k, void* v) { @@ -362,7 +362,7 @@ class PageMap { // Return the size class for p, or 0 if it is not known to tcmalloc // or is a page containing large objects. // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. - CompactSizeClass sizeclass(PageId p) ABSL_NO_THREAD_SAFETY_ANALYSIS { + CompactSizeClass sizeclass(PageId p) ABSL_NO_THREAD_SAFETY_ANALYSIS { return map_.sizeclass(p.index()); } @@ -397,7 +397,7 @@ class PageMap { // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc. ABSL_ATTRIBUTE_RETURNS_NONNULL inline Span* GetExistingDescriptor( PageId p) const ABSL_NO_THREAD_SAFETY_ANALYSIS { - Span* span = map_.get_existing(p.index()); + Span* span = map_.get_existing(p.index()); ASSERT(span != nullptr); return span; } @@ -424,8 +424,8 @@ class PageMap { #endif }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PAGEMAP_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc b/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc index 49ef5477d8..ba31e36943 100644 --- a/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc @@ -32,11 +32,11 @@ // create too many maps. namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // Pick span pointer to use for page numbered i -Span* span(intptr_t i) { return reinterpret_cast<Span*>(i + 1); } +Span* span(intptr_t i) { return reinterpret_cast<Span*>(i + 1); } // Pick sizeclass to use for page numbered i uint8_t sc(intptr_t i) { return i % 16; } @@ -69,7 +69,7 @@ class PageMapTest : public ::testing::TestWithParam<int> { } public: - using Map = PageMap2<20, alloc>; + using Map = PageMap2<20, alloc>; Map* map; private: @@ -139,22 +139,22 @@ INSTANTIATE_TEST_SUITE_P(Limits, PageMapTest, ::testing::Values(100, 1 << 20)); // that this is true even if this structure is mapped with huge pages. static struct PaddedPageMap { constexpr PaddedPageMap() : padding_before{}, pagemap{}, padding_after{} {} - uint64_t padding_before[kHugePageSize / sizeof(uint64_t)]; - PageMap pagemap; - uint64_t padding_after[kHugePageSize / sizeof(uint64_t)]; + uint64_t padding_before[kHugePageSize / sizeof(uint64_t)]; + PageMap pagemap; + uint64_t padding_after[kHugePageSize / sizeof(uint64_t)]; } padded_pagemap_; TEST(TestMemoryFootprint, Test) { uint64_t pagesize = sysconf(_SC_PAGESIZE); ASSERT_NE(pagesize, 0); - size_t pages = sizeof(PageMap) / pagesize + 1; + size_t pages = sizeof(PageMap) / pagesize + 1; std::vector<unsigned char> present(pages); // mincore needs the address rounded to the start page uint64_t basepage = reinterpret_cast<uintptr_t>(&padded_pagemap_.pagemap) & ~(pagesize - 1); - ASSERT_EQ(mincore(reinterpret_cast<void*>(basepage), sizeof(PageMap), - present.data()), + ASSERT_EQ(mincore(reinterpret_cast<void*>(basepage), sizeof(PageMap), + present.data()), 0); for (int i = 0; i < pages; i++) { EXPECT_EQ(present[i], 0); @@ -162,5 +162,5 @@ TEST(TestMemoryFootprint, Test) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/pages.h b/contrib/libs/tcmalloc/tcmalloc/pages.h index e674c9c9c8..0ff8fa3d5a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/pages.h +++ b/contrib/libs/tcmalloc/tcmalloc/pages.h @@ -23,11 +23,11 @@ #include "absl/strings/string_view.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/optimization.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Type that can hold the length of a run of pages class Length { @@ -144,20 +144,20 @@ class PageId { uintptr_t pn_; }; -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length LengthFromBytes(size_t bytes) { return Length(bytes >> kPageShift); } // Convert byte size into pages. This won't overflow, but may return // an unreasonably large value if bytes is huge enough. -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length BytesToLengthCeil(size_t bytes) { return Length((bytes >> kPageShift) + ((bytes & (kPageSize - 1)) > 0 ? 1 : 0)); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length BytesToLengthFloor(size_t bytes) { return Length(bytes >> kPageShift); } @@ -170,82 +170,82 @@ inline PageId& operator++(PageId& p) { // NOLINT(runtime/references) return p += Length(1); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<(PageId lhs, PageId rhs) { return lhs.pn_ < rhs.pn_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>(PageId lhs, PageId rhs) { return lhs.pn_ > rhs.pn_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<=(PageId lhs, PageId rhs) { return lhs.pn_ <= rhs.pn_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>=(PageId lhs, PageId rhs) { return lhs.pn_ >= rhs.pn_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator==(PageId lhs, PageId rhs) { return lhs.pn_ == rhs.pn_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator!=(PageId lhs, PageId rhs) { return lhs.pn_ != rhs.pn_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr PageId operator+(PageId lhs, Length rhs) { return lhs += rhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr PageId operator+(Length lhs, PageId rhs) { return rhs += lhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr PageId operator-(PageId lhs, Length rhs) { return lhs -= rhs; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator-(PageId lhs, PageId rhs) { ASSERT(lhs.pn_ >= rhs.pn_); return Length(lhs.pn_ - rhs.pn_); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline PageId PageIdContaining(const void* p) { return PageId(reinterpret_cast<uintptr_t>(p) >> kPageShift); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<(Length lhs, Length rhs) { return lhs.n_ < rhs.n_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>(Length lhs, Length rhs) { return lhs.n_ > rhs.n_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator<=(Length lhs, Length rhs) { return lhs.n_ <= rhs.n_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator>=(Length lhs, Length rhs) { return lhs.n_ >= rhs.n_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator==(Length lhs, Length rhs) { return lhs.n_ == rhs.n_; } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr bool operator!=(Length lhs, Length rhs) { return lhs.n_ != rhs.n_; } @@ -254,45 +254,45 @@ inline Length& operator++(Length& l) { return l += Length(1); } inline Length& operator--(Length& l) { return l -= Length(1); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator+(Length lhs, Length rhs) { return Length(lhs.raw_num() + rhs.raw_num()); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator-(Length lhs, Length rhs) { return Length(lhs.raw_num() - rhs.raw_num()); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator*(Length lhs, size_t rhs) { return Length(lhs.raw_num() * rhs); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator*(size_t lhs, Length rhs) { return Length(lhs * rhs.raw_num()); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr size_t operator/(Length lhs, Length rhs) { return lhs.raw_num() / rhs.raw_num(); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator/(Length lhs, size_t rhs) { ASSERT(rhs != 0); return Length(lhs.raw_num() / rhs); } -TCMALLOC_ATTRIBUTE_CONST +TCMALLOC_ATTRIBUTE_CONST inline constexpr Length operator%(Length lhs, Length rhs) { ASSERT(rhs.raw_num() != 0); return Length(lhs.raw_num() % rhs.raw_num()); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PAGES_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/parameters.cc b/contrib/libs/tcmalloc/tcmalloc/parameters.cc index 3f8e6e1ef8..3eb9a1efb0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/parameters.cc +++ b/contrib/libs/tcmalloc/tcmalloc/parameters.cc @@ -22,9 +22,9 @@ #include "tcmalloc/static_vars.h" #include "tcmalloc/thread_cache.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // As decide_subrelease() is determined at runtime, we cannot require constant // initialization for the atomic. This avoids an initialization order fiasco. @@ -69,10 +69,10 @@ ABSL_CONST_INIT std::atomic<MallocExtension::BytesPerSecond> }); ABSL_CONST_INIT std::atomic<int64_t> Parameters::guarded_sampling_rate_( 50 * kDefaultProfileSamplingRate); -ABSL_CONST_INIT std::atomic<bool> Parameters::shuffle_per_cpu_caches_enabled_( - false); -ABSL_CONST_INIT std::atomic<bool> - Parameters::reclaim_idle_per_cpu_caches_enabled_(true); +ABSL_CONST_INIT std::atomic<bool> Parameters::shuffle_per_cpu_caches_enabled_( + false); +ABSL_CONST_INIT std::atomic<bool> + Parameters::reclaim_idle_per_cpu_caches_enabled_(true); ABSL_CONST_INIT std::atomic<bool> Parameters::lazy_per_cpu_caches_enabled_( true); ABSL_CONST_INIT std::atomic<int32_t> Parameters::max_per_cpu_cache_size_( @@ -97,94 +97,94 @@ absl::Duration Parameters::filler_skip_subrelease_interval() { skip_subrelease_interval_ns().load(std::memory_order_relaxed)); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -using tcmalloc::tcmalloc_internal::kLog; -using tcmalloc::tcmalloc_internal::Log; -using tcmalloc::tcmalloc_internal::Parameters; -using tcmalloc::tcmalloc_internal::Static; +GOOGLE_MALLOC_SECTION_END +using tcmalloc::tcmalloc_internal::kLog; +using tcmalloc::tcmalloc_internal::Log; +using tcmalloc::tcmalloc_internal::Parameters; +using tcmalloc::tcmalloc_internal::Static; + extern "C" { int64_t MallocExtension_Internal_GetProfileSamplingRate() { - return Parameters::profile_sampling_rate(); + return Parameters::profile_sampling_rate(); } void MallocExtension_Internal_SetProfileSamplingRate(int64_t value) { - Parameters::set_profile_sampling_rate(value); + Parameters::set_profile_sampling_rate(value); } int64_t MallocExtension_Internal_GetGuardedSamplingRate() { - return Parameters::guarded_sampling_rate(); + return Parameters::guarded_sampling_rate(); } void MallocExtension_Internal_SetGuardedSamplingRate(int64_t value) { - Parameters::set_guarded_sampling_rate(value); + Parameters::set_guarded_sampling_rate(value); } int64_t MallocExtension_Internal_GetMaxTotalThreadCacheBytes() { - return Parameters::max_total_thread_cache_bytes(); + return Parameters::max_total_thread_cache_bytes(); } void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(int64_t value) { - Parameters::set_max_total_thread_cache_bytes(value); -} - -void MallocExtension_Internal_GetSkipSubreleaseInterval(absl::Duration* ret) { - *ret = Parameters::filler_skip_subrelease_interval(); -} - -void MallocExtension_Internal_SetSkipSubreleaseInterval(absl::Duration value) { - Parameters::set_filler_skip_subrelease_interval(value); + Parameters::set_max_total_thread_cache_bytes(value); } +void MallocExtension_Internal_GetSkipSubreleaseInterval(absl::Duration* ret) { + *ret = Parameters::filler_skip_subrelease_interval(); +} + +void MallocExtension_Internal_SetSkipSubreleaseInterval(absl::Duration value) { + Parameters::set_filler_skip_subrelease_interval(value); +} + tcmalloc::MallocExtension::BytesPerSecond MallocExtension_Internal_GetBackgroundReleaseRate() { - return Parameters::background_release_rate(); + return Parameters::background_release_rate(); } void MallocExtension_Internal_SetBackgroundReleaseRate( tcmalloc::MallocExtension::BytesPerSecond rate) { - Parameters::set_background_release_rate(rate); + Parameters::set_background_release_rate(rate); } void TCMalloc_Internal_SetBackgroundReleaseRate(size_t value) { - Parameters::background_release_rate_.store( + Parameters::background_release_rate_.store( static_cast<tcmalloc::MallocExtension::BytesPerSecond>(value)); } uint64_t TCMalloc_Internal_GetHeapSizeHardLimit() { - return Parameters::heap_size_hard_limit(); + return Parameters::heap_size_hard_limit(); } bool TCMalloc_Internal_GetHPAASubrelease() { - return Parameters::hpaa_subrelease(); -} - -bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled() { - return Parameters::shuffle_per_cpu_caches(); -} - -bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled() { - return Parameters::reclaim_idle_per_cpu_caches(); + return Parameters::hpaa_subrelease(); } +bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled() { + return Parameters::shuffle_per_cpu_caches(); +} + +bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled() { + return Parameters::reclaim_idle_per_cpu_caches(); +} + bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled() { - return Parameters::lazy_per_cpu_caches(); + return Parameters::lazy_per_cpu_caches(); } double TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction() { - return Parameters::peak_sampling_heap_growth_fraction(); + return Parameters::peak_sampling_heap_growth_fraction(); } bool TCMalloc_Internal_GetPerCpuCachesEnabled() { - return Parameters::per_cpu_caches(); + return Parameters::per_cpu_caches(); } void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v) { - Parameters::guarded_sampling_rate_.store(v, std::memory_order_relaxed); + Parameters::guarded_sampling_rate_.store(v, std::memory_order_relaxed); } // update_lock guards changes via SetHeapSizeHardLimit. @@ -193,7 +193,7 @@ ABSL_CONST_INIT static absl::base_internal::SpinLock update_lock( void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t value) { // Ensure that page allocator is set up. - Static::InitIfNecessary(); + Static::InitIfNecessary(); absl::base_internal::SpinLockHolder l(&update_lock); @@ -204,68 +204,68 @@ void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t value) { active = true; } - bool currently_hard = Static::page_allocator().limit().second; + bool currently_hard = Static::page_allocator().limit().second; if (active || currently_hard) { // Avoid resetting limit when current limit is soft. - Static::page_allocator().set_limit(limit, active /* is_hard */); - Log(kLog, __FILE__, __LINE__, "[tcmalloc] set page heap hard limit to", - limit, "bytes"); + Static::page_allocator().set_limit(limit, active /* is_hard */); + Log(kLog, __FILE__, __LINE__, "[tcmalloc] set page heap hard limit to", + limit, "bytes"); } } void TCMalloc_Internal_SetHPAASubrelease(bool v) { - tcmalloc::tcmalloc_internal::hpaa_subrelease_ptr()->store( - v, std::memory_order_relaxed); -} - -void TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v) { - Parameters::shuffle_per_cpu_caches_enabled_.store(v, - std::memory_order_relaxed); -} - -void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v) { - Parameters::reclaim_idle_per_cpu_caches_enabled_.store( - v, std::memory_order_relaxed); -} - + tcmalloc::tcmalloc_internal::hpaa_subrelease_ptr()->store( + v, std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v) { + Parameters::shuffle_per_cpu_caches_enabled_.store(v, + std::memory_order_relaxed); +} + +void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v) { + Parameters::reclaim_idle_per_cpu_caches_enabled_.store( + v, std::memory_order_relaxed); +} + void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v) { - Parameters::lazy_per_cpu_caches_enabled_.store(v, std::memory_order_relaxed); + Parameters::lazy_per_cpu_caches_enabled_.store(v, std::memory_order_relaxed); } void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v) { - Parameters::max_per_cpu_cache_size_.store(v, std::memory_order_relaxed); + Parameters::max_per_cpu_cache_size_.store(v, std::memory_order_relaxed); } void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v) { - Parameters::max_total_thread_cache_bytes_.store(v, std::memory_order_relaxed); + Parameters::max_total_thread_cache_bytes_.store(v, std::memory_order_relaxed); - absl::base_internal::SpinLockHolder l( - &tcmalloc::tcmalloc_internal::pageheap_lock); - tcmalloc::tcmalloc_internal::ThreadCache::set_overall_thread_cache_size(v); + absl::base_internal::SpinLockHolder l( + &tcmalloc::tcmalloc_internal::pageheap_lock); + tcmalloc::tcmalloc_internal::ThreadCache::set_overall_thread_cache_size(v); } void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v) { - Parameters::peak_sampling_heap_growth_fraction_.store( + Parameters::peak_sampling_heap_growth_fraction_.store( v, std::memory_order_relaxed); } void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v) { - Parameters::per_cpu_caches_enabled_.store(v, std::memory_order_relaxed); + Parameters::per_cpu_caches_enabled_.store(v, std::memory_order_relaxed); } void TCMalloc_Internal_SetProfileSamplingRate(int64_t v) { - Parameters::profile_sampling_rate_.store(v, std::memory_order_relaxed); + Parameters::profile_sampling_rate_.store(v, std::memory_order_relaxed); } void TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval( absl::Duration* v) { - *v = Parameters::filler_skip_subrelease_interval(); + *v = Parameters::filler_skip_subrelease_interval(); } void TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval( absl::Duration v) { - tcmalloc::tcmalloc_internal::skip_subrelease_interval_ns().store( - absl::ToInt64Nanoseconds(v), std::memory_order_relaxed); + tcmalloc::tcmalloc_internal::skip_subrelease_interval_ns().store( + absl::ToInt64Nanoseconds(v), std::memory_order_relaxed); } } // extern "C" diff --git a/contrib/libs/tcmalloc/tcmalloc/parameters.h b/contrib/libs/tcmalloc/tcmalloc/parameters.h index 64893f0402..e1786486f9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/parameters.h +++ b/contrib/libs/tcmalloc/tcmalloc/parameters.h @@ -22,13 +22,13 @@ #include "absl/base/internal/spinlock.h" #include "absl/time/time.h" #include "absl/types/optional.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/parameter_accessors.h" #include "tcmalloc/malloc_extension.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class Parameters { public: @@ -79,14 +79,14 @@ class Parameters { TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(value); } - static bool shuffle_per_cpu_caches() { - return shuffle_per_cpu_caches_enabled_.load(std::memory_order_relaxed); - } - - static bool reclaim_idle_per_cpu_caches() { - return reclaim_idle_per_cpu_caches_enabled_.load(std::memory_order_relaxed); - } - + static bool shuffle_per_cpu_caches() { + return shuffle_per_cpu_caches_enabled_.load(std::memory_order_relaxed); + } + + static bool reclaim_idle_per_cpu_caches() { + return reclaim_idle_per_cpu_caches_enabled_.load(std::memory_order_relaxed); + } + static bool lazy_per_cpu_caches() { return lazy_per_cpu_caches_enabled_.load(std::memory_order_relaxed); } @@ -121,8 +121,8 @@ class Parameters { friend void ::TCMalloc_Internal_SetBackgroundReleaseRate(size_t v); friend void ::TCMalloc_Internal_SetGuardedSamplingRate(int64_t v); friend void ::TCMalloc_Internal_SetHPAASubrelease(bool v); - friend void ::TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v); - friend void ::TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v); + friend void ::TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v); + friend void ::TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v); friend void ::TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v); friend void ::TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v); friend void ::TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v); @@ -135,8 +135,8 @@ class Parameters { static std::atomic<MallocExtension::BytesPerSecond> background_release_rate_; static std::atomic<int64_t> guarded_sampling_rate_; - static std::atomic<bool> shuffle_per_cpu_caches_enabled_; - static std::atomic<bool> reclaim_idle_per_cpu_caches_enabled_; + static std::atomic<bool> shuffle_per_cpu_caches_enabled_; + static std::atomic<bool> reclaim_idle_per_cpu_caches_enabled_; static std::atomic<bool> lazy_per_cpu_caches_enabled_; static std::atomic<int32_t> max_per_cpu_cache_size_; static std::atomic<int64_t> max_total_thread_cache_bytes_; @@ -145,8 +145,8 @@ class Parameters { static std::atomic<int64_t> profile_sampling_rate_; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PARAMETERS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc index 0dcc0df536..d247a16d50 100644 --- a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc +++ b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc @@ -26,9 +26,9 @@ #include "tcmalloc/stack_trace_table.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { bool PeakHeapTracker::IsNewPeak() { return peak_sampled_heap_size_.value() == 0 || @@ -76,18 +76,18 @@ void PeakHeapTracker::MaybeSaveSample() { peak_sampled_span_stacks_ = t; } -std::unique_ptr<ProfileBase> PeakHeapTracker::DumpSample() const { +std::unique_ptr<ProfileBase> PeakHeapTracker::DumpSample() const { auto profile = absl::make_unique<StackTraceTable>( ProfileType::kPeakHeap, Sampler::GetSamplePeriod(), true, true); absl::base_internal::SpinLockHolder h(&pageheap_lock); for (StackTrace* t = peak_sampled_span_stacks_; t != nullptr; - t = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1])) { + t = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1])) { profile->AddTrace(1.0, *t); } return profile; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h index a9f071d1b5..87d90fc548 100644 --- a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h +++ b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h @@ -21,9 +21,9 @@ #include "tcmalloc/internal/logging.h" #include "tcmalloc/malloc_extension.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class PeakHeapTracker { public: @@ -36,7 +36,7 @@ class PeakHeapTracker { void MaybeSaveSample() ABSL_LOCKS_EXCLUDED(pageheap_lock); // Return the saved high-water-mark heap profile, if any. - std::unique_ptr<ProfileBase> DumpSample() const + std::unique_ptr<ProfileBase> DumpSample() const ABSL_LOCKS_EXCLUDED(pageheap_lock); size_t CurrentPeakSize() const { return peak_sampled_heap_size_.value(); } @@ -49,13 +49,13 @@ class PeakHeapTracker { // Sampled heap size last time peak_sampled_span_stacks_ was saved. Only // written under pageheap_lock; may be read without it. - StatsCounter peak_sampled_heap_size_; + StatsCounter peak_sampled_heap_size_; bool IsNewPeak(); }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_PEAK_HEAP_TRACKER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/profile_test.cc b/contrib/libs/tcmalloc/tcmalloc/profile_test.cc index 0bd62cd428..242d5e2327 100644 --- a/contrib/libs/tcmalloc/tcmalloc/profile_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/profile_test.cc @@ -28,7 +28,7 @@ #include "gtest/gtest.h" #include "absl/container/flat_hash_map.h" #include "absl/synchronization/blocking_counter.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/internal/declarations.h" #include "tcmalloc/internal/linked_list.h" #include "tcmalloc/malloc_extension.h" @@ -39,21 +39,21 @@ namespace { TEST(AllocationSampleTest, TokenAbuse) { auto token = MallocExtension::StartAllocationProfiling(); - void *ptr = ::operator new(512 * 1024 * 1024); - // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. - benchmark::DoNotOptimize(ptr); - ::operator delete(ptr); + void *ptr = ::operator new(512 * 1024 * 1024); + // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. + benchmark::DoNotOptimize(ptr); + ::operator delete(ptr); // Repeated Claims should happily return null. auto profile = std::move(token).Stop(); int count = 0; profile.Iterate([&](const Profile::Sample &) { count++; }); - -#if !defined(UNDEFINED_BEHAVIOR_SANITIZER) - // UBSan does not implement our profiling API, but running the test can - // validate the correctness of the new/delete pairs. + +#if !defined(UNDEFINED_BEHAVIOR_SANITIZER) + // UBSan does not implement our profiling API, but running the test can + // validate the correctness of the new/delete pairs. EXPECT_EQ(count, 1); -#endif +#endif auto profile2 = std::move(token).Stop(); // NOLINT: use-after-move intended int count2 = 0; @@ -122,8 +122,8 @@ TEST(AllocationSampleTest, SampleAccuracy) { size_t size; size_t alignment; bool keep; - // objects we don't delete as we go - void *list = nullptr; + // objects we don't delete as we go + void *list = nullptr; }; std::vector<Requests> sizes = { {8, 0, false}, {16, 16, true}, {1024, 0, false}, @@ -136,7 +136,7 @@ TEST(AllocationSampleTest, SampleAccuracy) { // We use new/delete to allocate memory, as malloc returns objects aligned to // std::max_align_t. - for (auto &s : sizes) { + for (auto &s : sizes) { for (size_t bytes = 0; bytes < kTotalPerSize; bytes += s.size) { void *obj; if (s.alignment > 0) { @@ -145,9 +145,9 @@ TEST(AllocationSampleTest, SampleAccuracy) { obj = operator new(s.size); } if (s.keep) { - tcmalloc_internal::SLL_Push(&s.list, obj); - } else if (s.alignment > 0) { - operator delete(obj, static_cast<std::align_val_t>(s.alignment)); + tcmalloc_internal::SLL_Push(&s.list, obj); + } else if (s.alignment > 0) { + operator delete(obj, static_cast<std::align_val_t>(s.alignment)); } else { operator delete(obj); } @@ -166,21 +166,21 @@ TEST(AllocationSampleTest, SampleAccuracy) { } profile.Iterate([&](const tcmalloc::Profile::Sample &e) { - // Skip unexpected sizes. They may have been triggered by a background - // thread. - if (sizes_expected.find(e.allocated_size) == sizes_expected.end()) { - return; - } - + // Skip unexpected sizes. They may have been triggered by a background + // thread. + if (sizes_expected.find(e.allocated_size) == sizes_expected.end()) { + return; + } + // Don't check stack traces until we have evidence that's broken, it's // tedious and done fairly well elsewhere. m[e.allocated_size] += e.sum; EXPECT_EQ(alignment[e.requested_size], e.requested_alignment); }); -#if !defined(UNDEFINED_BEHAVIOR_SANITIZER) - // UBSan does not implement our profiling API, but running the test can - // validate the correctness of the new/delete pairs. +#if !defined(UNDEFINED_BEHAVIOR_SANITIZER) + // UBSan does not implement our profiling API, but running the test can + // validate the correctness of the new/delete pairs. size_t max_bytes = 0, min_bytes = std::numeric_limits<size_t>::max(); EXPECT_EQ(m.size(), sizes_expected.size()); for (auto seen : m) { @@ -194,18 +194,18 @@ TEST(AllocationSampleTest, SampleAccuracy) { EXPECT_GE((min_bytes * 3) / 2, max_bytes); EXPECT_LE((min_bytes * 3) / 4, kTotalPerSize); EXPECT_LE(kTotalPerSize, (max_bytes * 4) / 3); -#endif - +#endif + // Remove the objects we left alive - for (auto &s : sizes) { - while (s.list != nullptr) { - void *obj = tcmalloc_internal::SLL_Pop(&s.list); - if (s.alignment > 0) { - operator delete(obj, static_cast<std::align_val_t>(s.alignment)); - } else { - operator delete(obj); - } - } + for (auto &s : sizes) { + while (s.list != nullptr) { + void *obj = tcmalloc_internal::SLL_Pop(&s.list); + if (s.alignment > 0) { + operator delete(obj, static_cast<std::align_val_t>(s.alignment)); + } else { + operator delete(obj); + } + } } } diff --git a/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc index e0e6aba606..3bb8bee5b9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc @@ -25,7 +25,7 @@ #include "gtest/gtest.h" #include "absl/random/random.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" namespace tcmalloc { namespace { diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc index 4bca6485ca..4ff76c6caf 100644 --- a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc +++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc @@ -20,10 +20,10 @@ #include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { -namespace runtime_size_classes_internal { +namespace tcmalloc_internal { +namespace runtime_size_classes_internal { int ParseSizeClasses(absl::string_view env, int max_size, int max_classes, SizeClassInfo* parsed) { @@ -63,19 +63,19 @@ int ParseSizeClasses(absl::string_view env, int max_size, int max_classes, return c + 1; } -} // namespace runtime_size_classes_internal +} // namespace runtime_size_classes_internal int ABSL_ATTRIBUTE_NOINLINE MaybeSizeClassesFromEnv(int max_size, int max_classes, SizeClassInfo* parsed) { - const char* e = thread_safe_getenv("TCMALLOC_SIZE_CLASSES"); + const char* e = thread_safe_getenv("TCMALLOC_SIZE_CLASSES"); if (!e) { return 0; } - return runtime_size_classes_internal::ParseSizeClasses(e, max_size, - max_classes, parsed); + return runtime_size_classes_internal::ParseSizeClasses(e, max_size, + max_classes, parsed); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h index 42c5aa8859..106058aed8 100644 --- a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h +++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h @@ -19,10 +19,10 @@ #include "absl/strings/string_view.h" #include "tcmalloc/size_class_info.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { -namespace runtime_size_classes_internal { +namespace tcmalloc_internal { +namespace runtime_size_classes_internal { // Set size classes from a string. // Format: "size,pages,num_to_move;" @@ -33,7 +33,7 @@ namespace runtime_size_classes_internal { int ParseSizeClasses(absl::string_view env, int max_size, int max_classes, SizeClassInfo* parsed); -} // namespace runtime_size_classes_internal +} // namespace runtime_size_classes_internal // If the environment variable TCMALLOC_SIZE_CLASSES is defined, its value is // parsed using ParseSizeClasses and ApplySizeClassDefaults into parsed. The @@ -42,8 +42,8 @@ int ParseSizeClasses(absl::string_view env, int max_size, int max_classes, int MaybeSizeClassesFromEnv(int max_size, int max_classes, SizeClassInfo* parsed); -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_RUNTIME_SIZE_CLASSES_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc index 89a111e3b8..74c0ce5748 100644 --- a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc +++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc @@ -21,10 +21,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) { absl::string_view env = absl::string_view(reinterpret_cast<const char*>(d), size); - tcmalloc::tcmalloc_internal::SizeClassInfo - parsed[tcmalloc::tcmalloc_internal::kNumClasses]; - tcmalloc::tcmalloc_internal::runtime_size_classes_internal::ParseSizeClasses( - env, tcmalloc::tcmalloc_internal::kMaxSize, - tcmalloc::tcmalloc_internal::kNumClasses, parsed); + tcmalloc::tcmalloc_internal::SizeClassInfo + parsed[tcmalloc::tcmalloc_internal::kNumClasses]; + tcmalloc::tcmalloc_internal::runtime_size_classes_internal::ParseSizeClasses( + env, tcmalloc::tcmalloc_internal::kMaxSize, + tcmalloc::tcmalloc_internal::kNumClasses, parsed); return 0; } diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc index 6a8771f9e2..8d6ccca514 100644 --- a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc @@ -19,11 +19,11 @@ #include "gtest/gtest.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { -using runtime_size_classes_internal::ParseSizeClasses; - +using runtime_size_classes_internal::ParseSizeClasses; + constexpr int kNumClasses = 4; constexpr int kMaxSize = 1024 * 1024; @@ -110,5 +110,5 @@ TEST(RuntimeSizeClassesTest, EnvVariableExamined) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/sampler.cc b/contrib/libs/tcmalloc/tcmalloc/sampler.cc index 5e89c9e830..d26531487f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/sampler.cc +++ b/contrib/libs/tcmalloc/tcmalloc/sampler.cc @@ -23,9 +23,9 @@ #include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { ssize_t Sampler::GetSamplePeriod() { return Parameters::profile_sampling_rate(); @@ -201,6 +201,6 @@ double AllocatedBytes(const StackTrace& stack, bool unsample) { } } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/sampler.h b/contrib/libs/tcmalloc/tcmalloc/sampler.h index d18dd44234..66f2baadf9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/sampler.h +++ b/contrib/libs/tcmalloc/tcmalloc/sampler.h @@ -25,9 +25,9 @@ #include "tcmalloc/parameters.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { //------------------------------------------------------------------- // Sampler to decide when to create a sample trace for an allocation @@ -291,8 +291,8 @@ inline void Sampler::UpdateFastPathState() { // If unsample is false, the caller will handle unsampling. double AllocatedBytes(const StackTrace &stack, bool unsample); -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_SAMPLER_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/size_class_info.h b/contrib/libs/tcmalloc/tcmalloc/size_class_info.h index a424432b75..58b3e54159 100644 --- a/contrib/libs/tcmalloc/tcmalloc/size_class_info.h +++ b/contrib/libs/tcmalloc/tcmalloc/size_class_info.h @@ -20,9 +20,9 @@ #include "tcmalloc/internal/logging.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // The number of members in SizeClassInfo static constexpr int kSizeClassInfoMembers = 3; @@ -72,8 +72,8 @@ struct SizeClassInfo { size_t num_to_move; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_size_class_info_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes.cc index f4b444994d..7b8ad73459 100644 --- a/contrib/libs/tcmalloc/tcmalloc/size_classes.cc +++ b/contrib/libs/tcmalloc/tcmalloc/size_classes.cc @@ -14,11 +14,11 @@ #include "tcmalloc/common.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - +namespace tcmalloc_internal { + // <fixed> is fixed per-size-class overhead due to end-of-span fragmentation // and other factors. For instance, if we have a 96 byte size class, and use a // single 8KiB page, then we will hold 85 objects per span, and have 32 bytes @@ -68,10 +68,10 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 240, 1, 32}, // 0.98% { 256, 1, 32}, // 0.59% { 272, 1, 32}, // 0.98% - { 296, 1, 32}, // 3.10% + { 296, 1, 32}, // 3.10% { 312, 1, 32}, // 1.58% { 336, 1, 32}, // 2.18% - { 352, 1, 32}, // 1.78% + { 352, 1, 32}, // 1.78% { 368, 1, 32}, // 1.78% { 408, 1, 32}, // 0.98% { 448, 1, 32}, // 2.18% @@ -105,7 +105,7 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 9472, 5, 6}, // 8.23% { 10240, 4, 6}, // 6.82% { 12288, 3, 5}, // 0.20% - { 13568, 5, 4}, // 0.75% + { 13568, 5, 4}, // 0.75% { 14336, 7, 4}, // 0.08% { 16384, 2, 4}, // 0.29% { 20480, 5, 3}, // 0.12% @@ -119,13 +119,13 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 73728, 9, 2}, // 0.07% { 81920, 10, 2}, // 0.06% { 98304, 12, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% + { 114688, 14, 2}, // 0.04% { 131072, 16, 2}, // 0.04% { 147456, 18, 2}, // 0.03% { 163840, 20, 2}, // 0.03% { 180224, 22, 2}, // 0.03% { 204800, 25, 2}, // 0.02% - { 237568, 29, 2}, // 0.02% + { 237568, 29, 2}, // 0.02% { 262144, 32, 2}, // 0.02% }; #elif TCMALLOC_PAGE_SHIFT == 15 @@ -156,16 +156,16 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 176, 1, 32}, // 0.24% { 192, 1, 32}, // 0.54% { 208, 1, 32}, // 0.49% - { 224, 1, 32}, // 0.34% - { 240, 1, 32}, // 0.54% + { 224, 1, 32}, // 0.34% + { 240, 1, 32}, // 0.54% { 256, 1, 32}, // 0.15% { 280, 1, 32}, // 0.17% { 304, 1, 32}, // 0.89% - { 328, 1, 32}, // 1.06% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% + { 328, 1, 32}, // 1.06% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% { 416, 1, 32}, // 1.13% - { 448, 1, 32}, // 0.34% + { 448, 1, 32}, // 0.34% { 488, 1, 32}, // 0.37% { 512, 1, 32}, // 0.15% { 576, 1, 32}, // 1.74% @@ -176,8 +176,8 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 1024, 1, 32}, // 0.15% { 1152, 1, 32}, // 1.74% { 1280, 1, 32}, // 2.55% - { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% + { 1536, 1, 32}, // 1.74% + { 1792, 1, 32}, // 1.74% { 2048, 1, 32}, // 0.15% { 2176, 1, 30}, // 0.54% { 2304, 1, 28}, // 1.74% @@ -189,7 +189,7 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 4608, 1, 14}, // 1.74% { 5376, 1, 12}, // 1.74% { 6528, 1, 10}, // 0.54% - { 7168, 2, 9}, // 1.66% + { 7168, 2, 9}, // 1.66% { 8192, 1, 8}, // 0.15% { 9344, 2, 7}, // 0.27% { 10880, 1, 6}, // 0.54% @@ -200,7 +200,7 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 24576, 3, 2}, // 0.05% { 28032, 6, 2}, // 0.22% { 32768, 1, 2}, // 0.15% - { 38144, 5, 2}, // 7.41% + { 38144, 5, 2}, // 7.41% { 40960, 4, 2}, // 6.71% { 49152, 3, 2}, // 0.05% { 57344, 7, 2}, // 0.02% @@ -234,32 +234,32 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 80, 1, 32}, // 0.04% { 88, 1, 32}, // 0.05% { 96, 1, 32}, // 0.04% - { 104, 1, 32}, // 0.04% + { 104, 1, 32}, // 0.04% { 112, 1, 32}, // 0.04% { 128, 1, 32}, // 0.02% { 144, 1, 32}, // 0.04% { 160, 1, 32}, // 0.04% { 176, 1, 32}, // 0.05% { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% + { 208, 1, 32}, // 0.04% { 240, 1, 32}, // 0.04% { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% { 360, 1, 32}, // 0.04% - { 408, 1, 32}, // 0.10% - { 456, 1, 32}, // 0.17% + { 408, 1, 32}, // 0.10% + { 456, 1, 32}, // 0.17% { 512, 1, 32}, // 0.02% { 576, 1, 32}, // 0.04% { 640, 1, 32}, // 0.17% { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% { 832, 1, 32}, // 0.04% - { 896, 1, 32}, // 0.21% + { 896, 1, 32}, // 0.21% { 1024, 1, 32}, // 0.02% { 1152, 1, 32}, // 0.26% { 1280, 1, 32}, // 0.41% - { 1536, 1, 32}, // 0.41% + { 1536, 1, 32}, // 0.41% { 1664, 1, 32}, // 0.36% { 1792, 1, 32}, // 0.21% { 1920, 1, 32}, // 0.41% @@ -267,24 +267,24 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 2176, 1, 30}, // 0.41% { 2304, 1, 28}, // 0.71% { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% + { 2560, 1, 25}, // 0.41% { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% + { 2816, 1, 23}, // 0.12% { 2944, 1, 22}, // 0.07% { 3072, 1, 21}, // 0.41% { 3328, 1, 19}, // 1.00% { 3584, 1, 18}, // 0.21% { 3840, 1, 17}, // 0.41% { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% + { 4736, 1, 13}, // 0.66% { 5504, 1, 11}, // 1.35% { 6144, 1, 10}, // 1.61% - { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% + { 6528, 1, 10}, // 0.41% + { 6784, 1, 9}, // 1.71% { 7168, 1, 9}, // 1.61% { 7680, 1, 8}, // 0.41% { 8192, 1, 8}, // 0.02% - { 8704, 1, 7}, // 0.41% + { 8704, 1, 7}, // 0.41% { 9344, 1, 7}, // 0.21% { 10880, 1, 6}, // 0.41% { 11904, 1, 5}, // 0.12% @@ -332,11 +332,11 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 88, 1, 32}, // 2.37% { 96, 1, 32}, // 2.78% { 104, 1, 32}, // 2.17% - { 120, 1, 32}, // 1.57% + { 120, 1, 32}, // 1.57% { 128, 1, 32}, // 1.17% { 144, 1, 32}, // 2.78% { 160, 1, 32}, // 3.60% - { 184, 1, 32}, // 2.37% + { 184, 1, 32}, // 2.37% { 208, 1, 32}, // 4.86% { 240, 1, 32}, // 1.57% { 256, 1, 32}, // 1.17% @@ -347,22 +347,22 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 408, 1, 32}, // 1.57% { 512, 1, 32}, // 1.17% { 576, 2, 32}, // 2.18% - { 704, 2, 32}, // 6.40% + { 704, 2, 32}, // 6.40% { 768, 2, 32}, // 7.29% { 896, 2, 32}, // 2.18% { 1024, 2, 32}, // 0.59% { 1152, 3, 32}, // 7.08% { 1280, 3, 32}, // 7.08% { 1536, 3, 32}, // 0.39% - { 1792, 4, 32}, // 1.88% + { 1792, 4, 32}, // 1.88% { 2048, 4, 32}, // 0.29% { 2304, 4, 28}, // 1.88% { 2688, 4, 24}, // 1.88% - { 3456, 6, 18}, // 1.79% + { 3456, 6, 18}, // 1.79% { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% + { 5376, 4, 12}, // 1.88% { 6144, 3, 10}, // 0.39% - { 7168, 7, 9}, // 0.17% + { 7168, 7, 9}, // 0.17% { 8192, 4, 8}, // 0.29% }; #else @@ -452,12 +452,12 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 90112, 11, 2}, // 0.05% { 98304, 12, 2}, // 0.05% { 106496, 13, 2}, // 0.05% - { 114688, 14, 2}, // 0.04% + { 114688, 14, 2}, // 0.04% { 131072, 16, 2}, // 0.04% { 139264, 17, 2}, // 0.03% { 155648, 19, 2}, // 0.03% - { 172032, 21, 2}, // 0.03% - { 188416, 23, 2}, // 0.03% + { 172032, 21, 2}, // 0.03% + { 188416, 23, 2}, // 0.03% { 204800, 25, 2}, // 0.02% { 221184, 27, 2}, // 0.02% { 237568, 29, 2}, // 0.02% @@ -491,10 +491,10 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 272, 1, 32}, // 0.54% { 288, 1, 32}, // 0.84% { 304, 1, 32}, // 0.89% - { 320, 1, 32}, // 0.54% + { 320, 1, 32}, // 0.54% { 336, 1, 32}, // 0.69% - { 352, 1, 32}, // 0.24% - { 384, 1, 32}, // 0.54% + { 352, 1, 32}, // 0.24% + { 384, 1, 32}, // 0.54% { 416, 1, 32}, // 1.13% { 448, 1, 32}, // 0.34% { 480, 1, 32}, // 0.54% @@ -510,7 +510,7 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 1280, 1, 32}, // 2.55% { 1408, 1, 32}, // 1.33% { 1536, 1, 32}, // 1.74% - { 1792, 1, 32}, // 1.74% + { 1792, 1, 32}, // 1.74% { 2048, 1, 32}, // 0.15% { 2176, 1, 30}, // 0.54% { 2304, 1, 28}, // 1.74% @@ -570,11 +570,11 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 160, 1, 32}, // 0.04% { 176, 1, 32}, // 0.05% { 192, 1, 32}, // 0.04% - { 208, 1, 32}, // 0.04% + { 208, 1, 32}, // 0.04% { 240, 1, 32}, // 0.04% { 256, 1, 32}, // 0.02% - { 304, 1, 32}, // 0.05% - { 336, 1, 32}, // 0.04% + { 304, 1, 32}, // 0.05% + { 336, 1, 32}, // 0.04% { 368, 1, 32}, // 0.07% { 416, 1, 32}, // 0.04% { 464, 1, 32}, // 0.19% @@ -582,7 +582,7 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 576, 1, 32}, // 0.04% { 640, 1, 32}, // 0.17% { 704, 1, 32}, // 0.12% - { 768, 1, 32}, // 0.12% + { 768, 1, 32}, // 0.12% { 832, 1, 32}, // 0.04% { 896, 1, 32}, // 0.21% { 1024, 1, 32}, // 0.02% @@ -597,30 +597,30 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 2176, 1, 30}, // 0.41% { 2304, 1, 28}, // 0.71% { 2432, 1, 26}, // 0.76% - { 2560, 1, 25}, // 0.41% + { 2560, 1, 25}, // 0.41% { 2688, 1, 24}, // 0.56% - { 2816, 1, 23}, // 0.12% + { 2816, 1, 23}, // 0.12% { 2944, 1, 22}, // 0.07% { 3072, 1, 21}, // 0.41% - { 3200, 1, 20}, // 1.15% + { 3200, 1, 20}, // 1.15% { 3328, 1, 19}, // 1.00% { 3584, 1, 18}, // 0.21% { 3840, 1, 17}, // 0.41% { 4096, 1, 16}, // 0.02% - { 4736, 1, 13}, // 0.66% + { 4736, 1, 13}, // 0.66% { 5504, 1, 11}, // 1.35% { 6144, 1, 10}, // 1.61% { 6528, 1, 10}, // 0.41% - { 6784, 1, 9}, // 1.71% + { 6784, 1, 9}, // 1.71% { 7168, 1, 9}, // 1.61% { 7680, 1, 8}, // 0.41% { 8192, 1, 8}, // 0.02% { 8704, 1, 7}, // 0.41% { 9344, 1, 7}, // 0.21% - { 10368, 1, 6}, // 1.15% - { 11392, 1, 5}, // 0.07% - { 12416, 1, 5}, // 0.56% - { 13696, 1, 4}, // 0.76% + { 10368, 1, 6}, // 1.15% + { 11392, 1, 5}, // 0.07% + { 12416, 1, 5}, // 0.56% + { 13696, 1, 4}, // 0.76% { 14464, 1, 4}, // 0.71% { 16384, 1, 4}, // 0.02% { 17408, 1, 3}, // 0.41% @@ -695,7 +695,7 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { { 3200, 4, 20}, // 2.70% { 3584, 7, 18}, // 0.17% { 4096, 4, 16}, // 0.29% - { 5376, 4, 12}, // 1.88% + { 5376, 4, 12}, // 1.88% { 6144, 3, 10}, // 0.39% { 7168, 7, 9}, // 0.17% { 8192, 4, 8}, // 0.29% @@ -706,6 +706,6 @@ const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = { #endif // clang-format on -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc index d66ce5b186..de29d57954 100644 --- a/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc @@ -20,26 +20,26 @@ #include "tcmalloc/common.h" #include "tcmalloc/size_class_info.h" #include "tcmalloc/span.h" -#include "tcmalloc/tcmalloc_policy.h" +#include "tcmalloc/tcmalloc_policy.h" namespace tcmalloc { -namespace tcmalloc_internal { - -// Moved out of anonymous namespace so that it can be found by friend class in -// span.h. This allows tests to access span internals so that we can -// validate that scaling by a reciprocal correctly converts a pointer into -// an offset within a span. -class SpanTestPeer { - public: - static uint16_t CalcReciprocal(size_t size) { - return Span::CalcReciprocal(size); - } - static Span::ObjIdx TestOffsetToIdx(uintptr_t offset, size_t size, - uint16_t reciprocal) { - return Span::TestOffsetToIdx(offset, size, reciprocal); - } -}; - +namespace tcmalloc_internal { + +// Moved out of anonymous namespace so that it can be found by friend class in +// span.h. This allows tests to access span internals so that we can +// validate that scaling by a reciprocal correctly converts a pointer into +// an offset within a span. +class SpanTestPeer { + public: + static uint16_t CalcReciprocal(size_t size) { + return Span::CalcReciprocal(size); + } + static Span::ObjIdx TestOffsetToIdx(uintptr_t offset, size_t size, + uint16_t reciprocal) { + return Span::TestOffsetToIdx(offset, size, reciprocal); + } +}; + namespace { size_t Alignment(size_t size) { @@ -92,49 +92,49 @@ TEST_F(SizeClassesTest, SpanPages) { } } -TEST_F(SizeClassesTest, ValidateSufficientBitmapCapacity) { - // Validate that all the objects in a span can fit into a bitmap. - // The cut-off for using a bitmap is kBitmapMinObjectSize, so it is - // theoretically possible that a span could exceed this threshold - // for object size and contain more than 64 objects. - for (int c = 1; c < kNumClasses; ++c) { - const size_t max_size_in_class = m_.class_to_size(c); - if (max_size_in_class >= kBitmapMinObjectSize) { - const size_t objects_per_span = - Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c); - // Span can hold at most 64 objects of this size. - EXPECT_LE(objects_per_span, 64); - } - } -} - -TEST_F(SizeClassesTest, ValidateCorrectScalingByReciprocal) { - // Validate that multiplying by the reciprocal works for all size classes. - // When converting an offset within a span into an index we avoid a - // division operation by scaling by the reciprocal. The test ensures - // that this approach works for all objects in a span, for all object - // sizes. - for (int c = 1; c < kNumClasses; ++c) { - const size_t max_size_in_class = m_.class_to_size(c); - // Only test for sizes where object availability is recorded in a bitmap. - if (max_size_in_class < kBitmapMinObjectSize) { - continue; - } - size_t reciprocal = SpanTestPeer::CalcReciprocal(max_size_in_class); - const size_t objects_per_span = - Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c); - for (int index = 0; index < objects_per_span; index++) { - // Calculate the address of the object. - uintptr_t address = index * max_size_in_class; - // Calculate the index into the page using the reciprocal method. - int idx = - SpanTestPeer::TestOffsetToIdx(address, max_size_in_class, reciprocal); - // Check that the starting address back is correct. - ASSERT_EQ(address, idx * max_size_in_class); - } - } -} - +TEST_F(SizeClassesTest, ValidateSufficientBitmapCapacity) { + // Validate that all the objects in a span can fit into a bitmap. + // The cut-off for using a bitmap is kBitmapMinObjectSize, so it is + // theoretically possible that a span could exceed this threshold + // for object size and contain more than 64 objects. + for (int c = 1; c < kNumClasses; ++c) { + const size_t max_size_in_class = m_.class_to_size(c); + if (max_size_in_class >= kBitmapMinObjectSize) { + const size_t objects_per_span = + Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c); + // Span can hold at most 64 objects of this size. + EXPECT_LE(objects_per_span, 64); + } + } +} + +TEST_F(SizeClassesTest, ValidateCorrectScalingByReciprocal) { + // Validate that multiplying by the reciprocal works for all size classes. + // When converting an offset within a span into an index we avoid a + // division operation by scaling by the reciprocal. The test ensures + // that this approach works for all objects in a span, for all object + // sizes. + for (int c = 1; c < kNumClasses; ++c) { + const size_t max_size_in_class = m_.class_to_size(c); + // Only test for sizes where object availability is recorded in a bitmap. + if (max_size_in_class < kBitmapMinObjectSize) { + continue; + } + size_t reciprocal = SpanTestPeer::CalcReciprocal(max_size_in_class); + const size_t objects_per_span = + Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c); + for (int index = 0; index < objects_per_span; index++) { + // Calculate the address of the object. + uintptr_t address = index * max_size_in_class; + // Calculate the index into the page using the reciprocal method. + int idx = + SpanTestPeer::TestOffsetToIdx(address, max_size_in_class, reciprocal); + // Check that the starting address back is correct. + ASSERT_EQ(address, idx * max_size_in_class); + } + } +} + TEST_F(SizeClassesTest, Aligned) { // Validate that each size class is properly aligned. for (int c = 1; c < kNumClasses; c++) { @@ -152,19 +152,19 @@ TEST_F(SizeClassesTest, Distinguishable) { // ClassIndexMaybe provides 8 byte granularity below 1024 bytes and 128 byte // granularity for larger sizes, so our chosen size classes cannot be any // finer (otherwise they would map to the same entry in the lookup table). - // - // We don't check expanded size classes which are intentionally duplicated. - for (int partition = 0; partition < kNumaPartitions; partition++) { - for (int c = (partition * kNumBaseClasses) + 1; - c < (partition + 1) * kNumBaseClasses; c++) { - const size_t max_size_in_class = m_.class_to_size(c); - if (max_size_in_class == 0) { - continue; - } - const int class_index = m_.SizeClass( - CppPolicy().InNumaPartition(partition), max_size_in_class); - - EXPECT_EQ(c, class_index) << max_size_in_class; + // + // We don't check expanded size classes which are intentionally duplicated. + for (int partition = 0; partition < kNumaPartitions; partition++) { + for (int c = (partition * kNumBaseClasses) + 1; + c < (partition + 1) * kNumBaseClasses; c++) { + const size_t max_size_in_class = m_.class_to_size(c); + if (max_size_in_class == 0) { + continue; + } + const int class_index = m_.SizeClass( + CppPolicy().InNumaPartition(partition), max_size_in_class); + + EXPECT_EQ(c, class_index) << max_size_in_class; } } } @@ -189,11 +189,11 @@ TEST_F(SizeClassesTest, DoubleCheckedConsistency) { // Validate that every size on [0, kMaxSize] maps to a size class that is // neither too big nor too small. for (size_t size = 0; size <= kMaxSize; size++) { - const int sc = m_.SizeClass(CppPolicy(), size); + const int sc = m_.SizeClass(CppPolicy(), size); EXPECT_GT(sc, 0) << size; EXPECT_LT(sc, kNumClasses) << size; - if ((sc % kNumBaseClasses) > 1) { + if ((sc % kNumBaseClasses) > 1) { EXPECT_GT(size, m_.class_to_size(sc - 1)) << "Allocating unnecessarily large class"; } @@ -239,9 +239,9 @@ TEST_F(RunTimeSizeClassesTest, ExpandedSizeClasses) { // Verify that none of the default size classes are considered expanded size // classes. for (int i = 0; i < kNumClasses; i++) { - EXPECT_EQ(i < (m_.DefaultSizeClassesCount() * kNumaPartitions), - !IsExpandedSizeClass(i)) - << i; + EXPECT_EQ(i < (m_.DefaultSizeClassesCount() * kNumaPartitions), + !IsExpandedSizeClass(i)) + << i; } } @@ -346,14 +346,14 @@ TEST(SizeMapTest, GetSizeClass) { constexpr int kTrials = 1000; SizeMap m; - // Before m.Init(), SizeClass should always return 0 or the equivalent in a - // non-zero NUMA partition. + // Before m.Init(), SizeClass should always return 0 or the equivalent in a + // non-zero NUMA partition. for (int i = 0; i < kTrials; ++i) { const size_t size = absl::LogUniform(rng, 0, 4 << 20); uint32_t cl; - if (m.GetSizeClass(CppPolicy(), size, &cl)) { - EXPECT_EQ(cl % kNumBaseClasses, 0) << size; - EXPECT_LT(cl, kExpandedClassesStart) << size; + if (m.GetSizeClass(CppPolicy(), size, &cl)) { + EXPECT_EQ(cl % kNumBaseClasses, 0) << size; + EXPECT_LT(cl, kExpandedClassesStart) << size; } else { // We should only fail to lookup the size class when size is outside of // the size classes. @@ -367,7 +367,7 @@ TEST(SizeMapTest, GetSizeClass) { for (int i = 0; i < kTrials; ++i) { const size_t size = absl::LogUniform(rng, 0, 4 << 20); uint32_t cl; - if (m.GetSizeClass(CppPolicy(), size, &cl)) { + if (m.GetSizeClass(CppPolicy(), size, &cl)) { const size_t mapped_size = m.class_to_size(cl); // The size class needs to hold size. ASSERT_GE(mapped_size, size); @@ -384,15 +384,15 @@ TEST(SizeMapTest, GetSizeClassWithAlignment) { constexpr int kTrials = 1000; SizeMap m; - // Before m.Init(), SizeClass should always return 0 or the equivalent in a - // non-zero NUMA partition. + // Before m.Init(), SizeClass should always return 0 or the equivalent in a + // non-zero NUMA partition. for (int i = 0; i < kTrials; ++i) { const size_t size = absl::LogUniform(rng, 0, 4 << 20); const size_t alignment = 1 << absl::Uniform(rng, 0u, kHugePageShift); uint32_t cl; - if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) { - EXPECT_EQ(cl % kNumBaseClasses, 0) << size << " " << alignment; - EXPECT_LT(cl, kExpandedClassesStart) << size << " " << alignment; + if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) { + EXPECT_EQ(cl % kNumBaseClasses, 0) << size << " " << alignment; + EXPECT_LT(cl, kExpandedClassesStart) << size << " " << alignment; } else if (alignment < kPageSize) { // When alignment > kPageSize, we do not produce a size class. // TODO(b/172060547): alignment == kPageSize could fit into the size @@ -410,7 +410,7 @@ TEST(SizeMapTest, GetSizeClassWithAlignment) { const size_t size = absl::LogUniform(rng, 0, 4 << 20); const size_t alignment = 1 << absl::Uniform(rng, 0u, kHugePageShift); uint32_t cl; - if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) { + if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) { const size_t mapped_size = m.class_to_size(cl); // The size class needs to hold size. ASSERT_GE(mapped_size, size); @@ -432,13 +432,13 @@ TEST(SizeMapTest, SizeClass) { constexpr int kTrials = 1000; SizeMap m; - // Before m.Init(), SizeClass should always return 0 or the equivalent in a - // non-zero NUMA partition. + // Before m.Init(), SizeClass should always return 0 or the equivalent in a + // non-zero NUMA partition. for (int i = 0; i < kTrials; ++i) { const size_t size = absl::LogUniform<size_t>(rng, 0u, kMaxSize); - const uint32_t cl = m.SizeClass(CppPolicy(), size); - EXPECT_EQ(cl % kNumBaseClasses, 0) << size; - EXPECT_LT(cl, kExpandedClassesStart) << size; + const uint32_t cl = m.SizeClass(CppPolicy(), size); + EXPECT_EQ(cl % kNumBaseClasses, 0) << size; + EXPECT_LT(cl, kExpandedClassesStart) << size; } // After m.Init(), SizeClass should return a size class. @@ -446,7 +446,7 @@ TEST(SizeMapTest, SizeClass) { for (int i = 0; i < kTrials; ++i) { const size_t size = absl::LogUniform<size_t>(rng, 0u, kMaxSize); - uint32_t cl = m.SizeClass(CppPolicy(), size); + uint32_t cl = m.SizeClass(CppPolicy(), size); const size_t mapped_size = m.class_to_size(cl); // The size class needs to hold size. @@ -465,5 +465,5 @@ TEST(SizeMapTest, Preinit) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc index 17badddac9..fffd4b478a 100644 --- a/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc @@ -24,7 +24,7 @@ #include "tcmalloc/span.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class TestingSizeMap : public SizeMap { @@ -106,22 +106,22 @@ TEST_F(RunTimeSizeClassesTest, EnvRealClasses) { // With the runtime_size_classes library linked, the environment variable // will be parsed. - for (int c = 0; c < kNumClasses;) { - for (int end = c + count; c < end; c++) { - const SizeClassInfo& default_info = - m_.DefaultSizeClasses()[c % kNumBaseClasses]; - EXPECT_EQ(m_.class_to_size(c), default_info.size) << c; - EXPECT_EQ(m_.class_to_pages(c), default_info.pages); - EXPECT_EQ(m_.num_objects_to_move(c), default_info.num_to_move); - } - for (; (c % kNumBaseClasses) != 0; c++) { - EXPECT_EQ(m_.class_to_size(c), 0); - EXPECT_EQ(m_.class_to_pages(c), 0); - EXPECT_EQ(m_.num_objects_to_move(c), 0); - } + for (int c = 0; c < kNumClasses;) { + for (int end = c + count; c < end; c++) { + const SizeClassInfo& default_info = + m_.DefaultSizeClasses()[c % kNumBaseClasses]; + EXPECT_EQ(m_.class_to_size(c), default_info.size) << c; + EXPECT_EQ(m_.class_to_pages(c), default_info.pages); + EXPECT_EQ(m_.num_objects_to_move(c), default_info.num_to_move); + } + for (; (c % kNumBaseClasses) != 0; c++) { + EXPECT_EQ(m_.class_to_size(c), 0); + EXPECT_EQ(m_.class_to_pages(c), 0); + EXPECT_EQ(m_.num_objects_to_move(c), 0); + } } } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/span.cc b/contrib/libs/tcmalloc/tcmalloc/span.cc index 87e6f29244..f6fc842e75 100644 --- a/contrib/libs/tcmalloc/tcmalloc/span.cc +++ b/contrib/libs/tcmalloc/tcmalloc/span.cc @@ -18,8 +18,8 @@ #include <algorithm> -#include "absl/base/optimization.h" // ABSL_INTERNAL_ASSUME -#include "absl/numeric/bits.h" +#include "absl/base/optimization.h" // ABSL_INTERNAL_ASSUME +#include "absl/numeric/bits.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/atomic_stats_counter.h" #include "tcmalloc/internal/logging.h" @@ -29,22 +29,22 @@ #include "tcmalloc/sampler.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { void Span::Sample(StackTrace* stack) { ASSERT(!sampled_ && stack); sampled_ = 1; sampled_stack_ = stack; Static::sampled_objects_.prepend(this); - + // The cast to value matches Unsample. - tcmalloc_internal::StatsCounter::Value allocated_bytes = + tcmalloc_internal::StatsCounter::Value allocated_bytes = static_cast<tcmalloc_internal::StatsCounter::Value>( - AllocatedBytes(*stack, true)); - // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock. - Static::sampled_objects_size_.LossyAdd(allocated_bytes); + AllocatedBytes(*stack, true)); + // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock. + Static::sampled_objects_size_.LossyAdd(allocated_bytes); } StackTrace* Span::Unsample() { @@ -57,11 +57,11 @@ StackTrace* Span::Unsample() { RemoveFromList(); // from Static::sampled_objects_ // The cast to Value ensures no funny business happens during the negation if // sizeof(size_t) != sizeof(Value). - tcmalloc_internal::StatsCounter::Value neg_allocated_bytes = + tcmalloc_internal::StatsCounter::Value neg_allocated_bytes = -static_cast<tcmalloc_internal::StatsCounter::Value>( - AllocatedBytes(*stack, true)); - // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock. - Static::sampled_objects_size_.LossyAdd(neg_allocated_bytes); + AllocatedBytes(*stack, true)); + // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock. + Static::sampled_objects_size_.LossyAdd(neg_allocated_bytes); return stack; } @@ -176,109 +176,109 @@ Span::ObjIdx* Span::IdxToPtr(ObjIdx idx, size_t size) const { return ptr; } -Span::ObjIdx* Span::BitmapIdxToPtr(ObjIdx idx, size_t size) const { - uintptr_t off = - first_page_.start_uintptr() + (static_cast<uintptr_t>(idx) * size); - ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off); - return ptr; -} - -size_t Span::BitmapFreelistPopBatch(void** __restrict batch, size_t N, - size_t size) { -#ifndef NDEBUG - size_t before = bitmap_.CountBits(0, 64); -#endif // NDEBUG - - size_t count = 0; - // Want to fill the batch either with N objects, or the number of objects - // remaining in the span. - while (!bitmap_.IsZero() && count < N) { - size_t offset = bitmap_.FindSet(0); - ASSERT(offset < 64); - batch[count] = BitmapIdxToPtr(offset, size); - bitmap_.ClearLowestBit(); - count++; +Span::ObjIdx* Span::BitmapIdxToPtr(ObjIdx idx, size_t size) const { + uintptr_t off = + first_page_.start_uintptr() + (static_cast<uintptr_t>(idx) * size); + ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off); + return ptr; +} + +size_t Span::BitmapFreelistPopBatch(void** __restrict batch, size_t N, + size_t size) { +#ifndef NDEBUG + size_t before = bitmap_.CountBits(0, 64); +#endif // NDEBUG + + size_t count = 0; + // Want to fill the batch either with N objects, or the number of objects + // remaining in the span. + while (!bitmap_.IsZero() && count < N) { + size_t offset = bitmap_.FindSet(0); + ASSERT(offset < 64); + batch[count] = BitmapIdxToPtr(offset, size); + bitmap_.ClearLowestBit(); + count++; } -#ifndef NDEBUG - size_t after = bitmap_.CountBits(0, 64); - ASSERT(after + count == before); - ASSERT(allocated_ + count == embed_count_ - after); -#endif // NDEBUG - allocated_ += count; - return count; +#ifndef NDEBUG + size_t after = bitmap_.CountBits(0, 64); + ASSERT(after + count == before); + ASSERT(allocated_ + count == embed_count_ - after); +#endif // NDEBUG + allocated_ += count; + return count; } size_t Span::FreelistPopBatch(void** __restrict batch, size_t N, size_t size) { - // Handle spans with 64 or fewer objects using a bitmap. We expect spans - // to frequently hold smaller objects. - if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) { - return BitmapFreelistPopBatch(batch, N, size); - } - if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) { + // Handle spans with 64 or fewer objects using a bitmap. We expect spans + // to frequently hold smaller objects. + if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) { + return BitmapFreelistPopBatch(batch, N, size); + } + if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) { return FreelistPopBatchSized<Align::SMALL>(batch, N, size); } else { return FreelistPopBatchSized<Align::LARGE>(batch, N, size); } } -uint16_t Span::CalcReciprocal(size_t size) { - // Calculate scaling factor. We want to avoid dividing by the size of the - // object. Instead we'll multiply by a scaled version of the reciprocal. - // We divide kBitmapScalingDenominator by the object size, so later we can - // multiply by this reciprocal, and then divide this scaling factor out. - // TODO(djgove) These divides can be computed once at start up. - size_t reciprocal = 0; - // The spans hold objects up to kMaxSize, so it's safe to assume. - ABSL_INTERNAL_ASSUME(size <= kMaxSize); - if (size <= SizeMap::kMultiPageSize) { - reciprocal = kBitmapScalingDenominator / (size >> kAlignmentShift); - } else { - reciprocal = - kBitmapScalingDenominator / (size >> SizeMap::kMultiPageAlignmentShift); - } - ASSERT(reciprocal < 65536); - return static_cast<uint16_t>(reciprocal); -} - -void Span::BitmapBuildFreelist(size_t size, size_t count) { - // We are using a bitmap to indicate whether objects are used or not. The - // maximum capacity for the bitmap is 64 objects. - ASSERT(count <= 64); -#ifndef NDEBUG - // For bitmap_ use embed_count_ to record objects per span. - embed_count_ = count; -#endif // NDEBUG - reciprocal_ = CalcReciprocal(size); +uint16_t Span::CalcReciprocal(size_t size) { + // Calculate scaling factor. We want to avoid dividing by the size of the + // object. Instead we'll multiply by a scaled version of the reciprocal. + // We divide kBitmapScalingDenominator by the object size, so later we can + // multiply by this reciprocal, and then divide this scaling factor out. + // TODO(djgove) These divides can be computed once at start up. + size_t reciprocal = 0; + // The spans hold objects up to kMaxSize, so it's safe to assume. + ABSL_INTERNAL_ASSUME(size <= kMaxSize); + if (size <= SizeMap::kMultiPageSize) { + reciprocal = kBitmapScalingDenominator / (size >> kAlignmentShift); + } else { + reciprocal = + kBitmapScalingDenominator / (size >> SizeMap::kMultiPageAlignmentShift); + } + ASSERT(reciprocal < 65536); + return static_cast<uint16_t>(reciprocal); +} + +void Span::BitmapBuildFreelist(size_t size, size_t count) { + // We are using a bitmap to indicate whether objects are used or not. The + // maximum capacity for the bitmap is 64 objects. + ASSERT(count <= 64); +#ifndef NDEBUG + // For bitmap_ use embed_count_ to record objects per span. + embed_count_ = count; +#endif // NDEBUG + reciprocal_ = CalcReciprocal(size); allocated_ = 0; - bitmap_.Clear(); // bitmap_ can be non-zero from a previous use. - bitmap_.SetRange(0, count); - ASSERT(bitmap_.CountBits(0, 64) == count); -} - -int Span::BuildFreelist(size_t size, size_t count, void** batch, int N) { + bitmap_.Clear(); // bitmap_ can be non-zero from a previous use. + bitmap_.SetRange(0, count); + ASSERT(bitmap_.CountBits(0, 64) == count); +} + +int Span::BuildFreelist(size_t size, size_t count, void** batch, int N) { freelist_ = kListEnd; - if (size >= kBitmapMinObjectSize) { - BitmapBuildFreelist(size, count); - return BitmapFreelistPopBatch(batch, N, size); - } - - // First, push as much as we can into the batch. - char* ptr = static_cast<char*>(start_address()); - int result = N <= count ? N : count; - for (int i = 0; i < result; ++i) { - batch[i] = ptr; - ptr += size; - } - allocated_ = result; - + if (size >= kBitmapMinObjectSize) { + BitmapBuildFreelist(size, count); + return BitmapFreelistPopBatch(batch, N, size); + } + + // First, push as much as we can into the batch. + char* ptr = static_cast<char*>(start_address()); + int result = N <= count ? N : count; + for (int i = 0; i < result; ++i) { + batch[i] = ptr; + ptr += size; + } + allocated_ = result; + ObjIdx idxStep = size / kAlignment; // Valid objects are {0, idxStep, idxStep * 2, ..., idxStep * (count - 1)}. if (size > SizeMap::kMultiPageSize) { idxStep = size / SizeMap::kMultiPageAlignment; } - ObjIdx idx = idxStep * result; + ObjIdx idx = idxStep * result; // Verify that the end of the useful portion of the span (and the beginning of // the span waste) has an index that doesn't overflow or risk confusion with @@ -290,43 +290,43 @@ int Span::BuildFreelist(size_t size, size_t count, void** batch, int N) { // The index of the end of the useful portion of the span. ObjIdx idxEnd = count * idxStep; - - // Then, push as much as we can into the cache_. - int cache_size = 0; - for (; idx < idxEnd && cache_size < kCacheSize; idx += idxStep) { - cache_[cache_size] = idx; - cache_size++; + + // Then, push as much as we can into the cache_. + int cache_size = 0; + for (; idx < idxEnd && cache_size < kCacheSize; idx += idxStep) { + cache_[cache_size] = idx; + cache_size++; } - cache_size_ = cache_size; - + cache_size_ = cache_size; + // Now, build freelist and stack other objects onto freelist objects. // Note: we take freelist objects from the beginning and stacked objects // from the end. This has a nice property of not paging in whole span at once // and not draining whole cache. ObjIdx* host = nullptr; // cached first object on freelist const size_t max_embed = size / sizeof(ObjIdx) - 1; - int embed_count = 0; + int embed_count = 0; while (idx < idxEnd) { // Check the no idx can be confused with kListEnd. ASSERT(idx != kListEnd); - if (host && embed_count != max_embed) { + if (host && embed_count != max_embed) { // Push onto first object on the freelist. - embed_count++; + embed_count++; idxEnd -= idxStep; - host[embed_count] = idxEnd; + host[embed_count] = idxEnd; } else { // The first object is full, push new object onto freelist. host = IdxToPtr(idx, size); host[0] = freelist_; freelist_ = idx; - embed_count = 0; + embed_count = 0; idx += idxStep; } } - embed_count_ = embed_count; - return result; + embed_count_ = embed_count; + return result; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/span.h b/contrib/libs/tcmalloc/tcmalloc/span.h index c589709094..c11f635bd8 100644 --- a/contrib/libs/tcmalloc/tcmalloc/span.h +++ b/contrib/libs/tcmalloc/tcmalloc/span.h @@ -22,29 +22,29 @@ #include <string.h> #include "absl/base/thread_annotations.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/linked_list.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" -#include "tcmalloc/internal/range_tracker.h" +#include "tcmalloc/internal/range_tracker.h" #include "tcmalloc/pages.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - -// Can fit 64 objects into a bitmap, so determine what the minimum object -// size needs to be in order for that to work. This makes the assumption that -// we don't increase the number of pages at a point where the object count -// ends up exceeding 64. -inline constexpr size_t kBitmapMinObjectSize = kPageSize / 64; - -// Denominator for bitmap scaling factor. The idea is that instead of dividing -// by N we multiply by M = kBitmapScalingDenominator / N and round the resulting -// value. -inline constexpr size_t kBitmapScalingDenominator = 65536; - +namespace tcmalloc_internal { + +// Can fit 64 objects into a bitmap, so determine what the minimum object +// size needs to be in order for that to work. This makes the assumption that +// we don't increase the number of pages at a point where the object count +// ends up exceeding 64. +inline constexpr size_t kBitmapMinObjectSize = kPageSize / 64; + +// Denominator for bitmap scaling factor. The idea is that instead of dividing +// by N we multiply by M = kBitmapScalingDenominator / N and round the resulting +// value. +inline constexpr size_t kBitmapScalingDenominator = 65536; + // Information kept for a span (a contiguous run of pages). // // Spans can be in different states. The current state determines set of methods @@ -163,38 +163,38 @@ class Span : public SpanList::Elem { // These methods REQUIRE a SMALL_OBJECT span. // --------------------------------------------------------------------------- - // Indicates whether the object is considered large or small based on - // size > SizeMap::kMultiPageSize. - enum class Align { SMALL, LARGE }; - - // Indicate whether the Span is empty. Size is used to determine whether - // the span is using a compressed linked list of objects, or a bitmap - // to hold available objects. - bool FreelistEmpty(size_t size) const; + // Indicates whether the object is considered large or small based on + // size > SizeMap::kMultiPageSize. + enum class Align { SMALL, LARGE }; + // Indicate whether the Span is empty. Size is used to determine whether + // the span is using a compressed linked list of objects, or a bitmap + // to hold available objects. + bool FreelistEmpty(size_t size) const; + // Pushes ptr onto freelist unless the freelist becomes full, // in which case just return false. - bool FreelistPush(void* ptr, size_t size) { - ASSERT(allocated_ > 0); - if (ABSL_PREDICT_FALSE(allocated_ == 1)) { - return false; - } - allocated_--; - // Bitmaps are used to record object availability when there are fewer than - // 64 objects in a span. - if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) { - if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) { - return BitmapFreelistPush<Align::SMALL>(ptr, size); - } else { - return BitmapFreelistPush<Align::LARGE>(ptr, size); - } - } - if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) { - return FreelistPushSized<Align::SMALL>(ptr, size); - } else { - return FreelistPushSized<Align::LARGE>(ptr, size); - } - } + bool FreelistPush(void* ptr, size_t size) { + ASSERT(allocated_ > 0); + if (ABSL_PREDICT_FALSE(allocated_ == 1)) { + return false; + } + allocated_--; + // Bitmaps are used to record object availability when there are fewer than + // 64 objects in a span. + if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) { + if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) { + return BitmapFreelistPush<Align::SMALL>(ptr, size); + } else { + return BitmapFreelistPush<Align::LARGE>(ptr, size); + } + } + if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) { + return FreelistPushSized<Align::SMALL>(ptr, size); + } else { + return FreelistPushSized<Align::LARGE>(ptr, size); + } + } // Pops up to N objects from the freelist and returns them in the batch array. // Returns number of objects actually popped. @@ -204,9 +204,9 @@ class Span : public SpanList::Elem { void Init(PageId p, Length n); // Initialize freelist to contain all objects in the span. - // Pops up to N objects from the freelist and returns them in the batch array. - // Returns number of objects actually popped. - int BuildFreelist(size_t size, size_t count, void** batch, int N); + // Pops up to N objects from the freelist and returns them in the batch array. + // Returns number of objects actually popped. + int BuildFreelist(size_t size, size_t count, void** batch, int N); // Prefetch cacheline containing most important span information. void Prefetch(); @@ -227,15 +227,15 @@ class Span : public SpanList::Elem { // look at b/35680381 and cl/199502226. uint16_t allocated_; // Number of non-free objects uint16_t embed_count_; - // For available objects stored as a compressed linked list, the index of - // the first object in recorded in freelist_. When a bitmap is used to - // represent available objects, the reciprocal of the object size is - // stored to enable conversion from the offset of an object within a - // span to the index of the object. - union { - uint16_t freelist_; - uint16_t reciprocal_; - }; + // For available objects stored as a compressed linked list, the index of + // the first object in recorded in freelist_. When a bitmap is used to + // represent available objects, the reciprocal of the object size is + // stored to enable conversion from the offset of an object within a + // span to the index of the object. + union { + uint16_t freelist_; + uint16_t reciprocal_; + }; uint8_t cache_size_; uint8_t location_ : 2; // Is the span on a freelist, and if so, which? uint8_t sampled_ : 1; // Sampled object? @@ -245,11 +245,11 @@ class Span : public SpanList::Elem { // Embed cache of free objects. ObjIdx cache_[kCacheSize]; - // Used for spans with in CentralFreeList with fewer than 64 objects. - // Each bit is set to one when the object is available, and zero - // when the object is used. - Bitmap<64> bitmap_{}; - + // Used for spans with in CentralFreeList with fewer than 64 objects. + // Each bit is set to one when the object is available, and zero + // when the object is used. + Bitmap<64> bitmap_{}; + // Used only for sampled spans (SAMPLED state). StackTrace* sampled_stack_; @@ -268,61 +268,61 @@ class Span : public SpanList::Elem { ObjIdx PtrToIdx(void* ptr, size_t size) const; ObjIdx* IdxToPtr(ObjIdx idx, size_t size) const; - // For bitmap'd spans conversion from an offset to an index is performed - // by multiplying by the scaled reciprocal of the object size. - static uint16_t CalcReciprocal(size_t size); - - // Convert object pointer <-> freelist index for bitmap managed objects. - template <Align align> - ObjIdx BitmapPtrToIdx(void* ptr, size_t size) const; - ObjIdx* BitmapIdxToPtr(ObjIdx idx, size_t size) const; - - // Helper function for converting a pointer to an index. - template <Align align> - static ObjIdx OffsetToIdx(uintptr_t offset, size_t size, uint16_t reciprocal); - // Helper function for testing round trips between pointers and indexes. - static ObjIdx TestOffsetToIdx(uintptr_t ptr, size_t size, - uint16_t reciprocal) { - if (size <= SizeMap::kMultiPageSize) { - return OffsetToIdx<Align::SMALL>(ptr, size, reciprocal); - } else { - return OffsetToIdx<Align::LARGE>(ptr, size, reciprocal); - } - } + // For bitmap'd spans conversion from an offset to an index is performed + // by multiplying by the scaled reciprocal of the object size. + static uint16_t CalcReciprocal(size_t size); + // Convert object pointer <-> freelist index for bitmap managed objects. template <Align align> + ObjIdx BitmapPtrToIdx(void* ptr, size_t size) const; + ObjIdx* BitmapIdxToPtr(ObjIdx idx, size_t size) const; + + // Helper function for converting a pointer to an index. + template <Align align> + static ObjIdx OffsetToIdx(uintptr_t offset, size_t size, uint16_t reciprocal); + // Helper function for testing round trips between pointers and indexes. + static ObjIdx TestOffsetToIdx(uintptr_t ptr, size_t size, + uint16_t reciprocal) { + if (size <= SizeMap::kMultiPageSize) { + return OffsetToIdx<Align::SMALL>(ptr, size, reciprocal); + } else { + return OffsetToIdx<Align::LARGE>(ptr, size, reciprocal); + } + } + + template <Align align> ObjIdx* IdxToPtrSized(ObjIdx idx, size_t size) const; template <Align align> - ObjIdx PtrToIdxSized(void* ptr, size_t size) const; - - template <Align align> + ObjIdx PtrToIdxSized(void* ptr, size_t size) const; + + template <Align align> size_t FreelistPopBatchSized(void** __restrict batch, size_t N, size_t size); - - template <Align align> - bool FreelistPushSized(void* ptr, size_t size); - - // For spans containing 64 or fewer objects, indicate that the object at the - // index has been returned. Always returns true. - template <Align align> - bool BitmapFreelistPush(void* ptr, size_t size); - - // A bitmap is used to indicate object availability for spans containing - // 64 or fewer objects. - void BitmapBuildFreelist(size_t size, size_t count); - - // For spans with 64 or fewer objects populate batch with up to N objects. - // Returns number of objects actually popped. - size_t BitmapFreelistPopBatch(void** batch, size_t N, size_t size); - - // Friend class to enable more indepth testing of bitmap code. - friend class SpanTestPeer; + + template <Align align> + bool FreelistPushSized(void* ptr, size_t size); + + // For spans containing 64 or fewer objects, indicate that the object at the + // index has been returned. Always returns true. + template <Align align> + bool BitmapFreelistPush(void* ptr, size_t size); + + // A bitmap is used to indicate object availability for spans containing + // 64 or fewer objects. + void BitmapBuildFreelist(size_t size, size_t count); + + // For spans with 64 or fewer objects populate batch with up to N objects. + // Returns number of objects actually popped. + size_t BitmapFreelistPopBatch(void** batch, size_t N, size_t size); + + // Friend class to enable more indepth testing of bitmap code. + friend class SpanTestPeer; }; template <Span::Align align> Span::ObjIdx* Span::IdxToPtrSized(ObjIdx idx, size_t size) const { ASSERT(idx != kListEnd); - static_assert(align == Align::LARGE || align == Align::SMALL); + static_assert(align == Align::LARGE || align == Align::SMALL); uintptr_t off = first_page_.start_uintptr() + (static_cast<uintptr_t>(idx) @@ -334,38 +334,38 @@ Span::ObjIdx* Span::IdxToPtrSized(ObjIdx idx, size_t size) const { } template <Span::Align align> -Span::ObjIdx Span::PtrToIdxSized(void* ptr, size_t size) const { - // Object index is an offset from span start divided by a power-of-two. - // The divisors are choosen so that - // (1) objects are aligned on the divisor, - // (2) index fits into 16 bits and - // (3) the index of the beginning of all objects is strictly less than - // kListEnd (note that we have 256K pages and multi-page spans). - // For example with 1M spans we need kMultiPageAlignment >= 16. - // An ASSERT in BuildFreelist() verifies a condition which implies (3). - uintptr_t p = reinterpret_cast<uintptr_t>(ptr); - uintptr_t off; - if (align == Align::SMALL) { - // Generally we need to load first_page_ to compute the offset. - // But first_page_ can be in a different cache line then the fields that - // we use in FreelistPush otherwise (cache_, cache_size_, freelist_). - // So we avoid loading first_page_ for smaller sizes that have one page per - // span, instead we compute the offset by taking low kPageShift bits of the - // pointer. - ASSERT(PageIdContaining(ptr) == first_page_); - ASSERT(num_pages_ == Length(1)); - off = (p & (kPageSize - 1)) / kAlignment; - } else { - off = (p - first_page_.start_uintptr()) / SizeMap::kMultiPageAlignment; - } - ObjIdx idx = static_cast<ObjIdx>(off); - ASSERT(idx != kListEnd); - ASSERT(idx == off); - ASSERT(IdxToPtr(idx, size) == ptr); - return idx; -} - -template <Span::Align align> +Span::ObjIdx Span::PtrToIdxSized(void* ptr, size_t size) const { + // Object index is an offset from span start divided by a power-of-two. + // The divisors are choosen so that + // (1) objects are aligned on the divisor, + // (2) index fits into 16 bits and + // (3) the index of the beginning of all objects is strictly less than + // kListEnd (note that we have 256K pages and multi-page spans). + // For example with 1M spans we need kMultiPageAlignment >= 16. + // An ASSERT in BuildFreelist() verifies a condition which implies (3). + uintptr_t p = reinterpret_cast<uintptr_t>(ptr); + uintptr_t off; + if (align == Align::SMALL) { + // Generally we need to load first_page_ to compute the offset. + // But first_page_ can be in a different cache line then the fields that + // we use in FreelistPush otherwise (cache_, cache_size_, freelist_). + // So we avoid loading first_page_ for smaller sizes that have one page per + // span, instead we compute the offset by taking low kPageShift bits of the + // pointer. + ASSERT(PageIdContaining(ptr) == first_page_); + ASSERT(num_pages_ == Length(1)); + off = (p & (kPageSize - 1)) / kAlignment; + } else { + off = (p - first_page_.start_uintptr()) / SizeMap::kMultiPageAlignment; + } + ObjIdx idx = static_cast<ObjIdx>(off); + ASSERT(idx != kListEnd); + ASSERT(idx == off); + ASSERT(IdxToPtr(idx, size) == ptr); + return idx; +} + +template <Span::Align align> size_t Span::FreelistPopBatchSized(void** __restrict batch, size_t N, size_t size) { size_t result = 0; @@ -422,84 +422,84 @@ size_t Span::FreelistPopBatchSized(void** __restrict batch, size_t N, return result; } -template <Span::Align align> -bool Span::FreelistPushSized(void* ptr, size_t size) { - ObjIdx idx = PtrToIdxSized<align>(ptr, size); - if (cache_size_ != kCacheSize) { - // Have empty space in the cache, push there. - cache_[cache_size_] = idx; - cache_size_++; - } else if (ABSL_PREDICT_TRUE(freelist_ != kListEnd) && - // -1 because the first slot is used by freelist link. - ABSL_PREDICT_TRUE(embed_count_ != size / sizeof(ObjIdx) - 1)) { - // Push onto the first object on freelist. - ObjIdx* host; - if (align == Align::SMALL) { - // Avoid loading first_page_ in this case (see the comment in PtrToIdx). - ASSERT(num_pages_ == Length(1)); - host = reinterpret_cast<ObjIdx*>( - (reinterpret_cast<uintptr_t>(ptr) & ~(kPageSize - 1)) + - static_cast<uintptr_t>(freelist_) * kAlignment); - ASSERT(PtrToIdx(host, size) == freelist_); - } else { - host = IdxToPtrSized<align>(freelist_, size); - } - embed_count_++; - host[embed_count_] = idx; - } else { - // Push onto freelist. - *reinterpret_cast<ObjIdx*>(ptr) = freelist_; - freelist_ = idx; - embed_count_ = 0; - } - return true; -} - -template <Span::Align align> -Span::ObjIdx Span::OffsetToIdx(uintptr_t offset, size_t size, - uint16_t reciprocal) { - if (align == Align::SMALL) { - return static_cast<ObjIdx>( - // Add kBitmapScalingDenominator / 2 to round to nearest integer. - ((offset >> kAlignmentShift) * reciprocal + - kBitmapScalingDenominator / 2) / - kBitmapScalingDenominator); - } else { - return static_cast<ObjIdx>( - ((offset >> SizeMap::kMultiPageAlignmentShift) * reciprocal + - kBitmapScalingDenominator / 2) / - kBitmapScalingDenominator); - } -} - -template <Span::Align align> -Span::ObjIdx Span::BitmapPtrToIdx(void* ptr, size_t size) const { - uintptr_t p = reinterpret_cast<uintptr_t>(ptr); - uintptr_t off = static_cast<uint32_t>(p - first_page_.start_uintptr()); - ObjIdx idx = OffsetToIdx<align>(off, size, reciprocal_); - ASSERT(BitmapIdxToPtr(idx, size) == ptr); - return idx; -} - -template <Span::Align align> -bool Span::BitmapFreelistPush(void* ptr, size_t size) { -#ifndef NDEBUG - size_t before = bitmap_.CountBits(0, 64); -#endif - // TODO(djgove) Conversions to offsets can be computed outside of lock. - ObjIdx idx = BitmapPtrToIdx<align>(ptr, size); - // Check that the object is not already returned. - ASSERT(bitmap_.GetBit(idx) == 0); - // Set the bit indicating where the object was returned. - bitmap_.SetBit(idx); -#ifndef NDEBUG - size_t after = bitmap_.CountBits(0, 64); - ASSERT(before + 1 == after); - ASSERT(allocated_ == embed_count_ - after); -#endif - return true; -} - +template <Span::Align align> +bool Span::FreelistPushSized(void* ptr, size_t size) { + ObjIdx idx = PtrToIdxSized<align>(ptr, size); + if (cache_size_ != kCacheSize) { + // Have empty space in the cache, push there. + cache_[cache_size_] = idx; + cache_size_++; + } else if (ABSL_PREDICT_TRUE(freelist_ != kListEnd) && + // -1 because the first slot is used by freelist link. + ABSL_PREDICT_TRUE(embed_count_ != size / sizeof(ObjIdx) - 1)) { + // Push onto the first object on freelist. + ObjIdx* host; + if (align == Align::SMALL) { + // Avoid loading first_page_ in this case (see the comment in PtrToIdx). + ASSERT(num_pages_ == Length(1)); + host = reinterpret_cast<ObjIdx*>( + (reinterpret_cast<uintptr_t>(ptr) & ~(kPageSize - 1)) + + static_cast<uintptr_t>(freelist_) * kAlignment); + ASSERT(PtrToIdx(host, size) == freelist_); + } else { + host = IdxToPtrSized<align>(freelist_, size); + } + embed_count_++; + host[embed_count_] = idx; + } else { + // Push onto freelist. + *reinterpret_cast<ObjIdx*>(ptr) = freelist_; + freelist_ = idx; + embed_count_ = 0; + } + return true; +} + +template <Span::Align align> +Span::ObjIdx Span::OffsetToIdx(uintptr_t offset, size_t size, + uint16_t reciprocal) { + if (align == Align::SMALL) { + return static_cast<ObjIdx>( + // Add kBitmapScalingDenominator / 2 to round to nearest integer. + ((offset >> kAlignmentShift) * reciprocal + + kBitmapScalingDenominator / 2) / + kBitmapScalingDenominator); + } else { + return static_cast<ObjIdx>( + ((offset >> SizeMap::kMultiPageAlignmentShift) * reciprocal + + kBitmapScalingDenominator / 2) / + kBitmapScalingDenominator); + } +} + +template <Span::Align align> +Span::ObjIdx Span::BitmapPtrToIdx(void* ptr, size_t size) const { + uintptr_t p = reinterpret_cast<uintptr_t>(ptr); + uintptr_t off = static_cast<uint32_t>(p - first_page_.start_uintptr()); + ObjIdx idx = OffsetToIdx<align>(off, size, reciprocal_); + ASSERT(BitmapIdxToPtr(idx, size) == ptr); + return idx; +} + +template <Span::Align align> +bool Span::BitmapFreelistPush(void* ptr, size_t size) { +#ifndef NDEBUG + size_t before = bitmap_.CountBits(0, 64); +#endif + // TODO(djgove) Conversions to offsets can be computed outside of lock. + ObjIdx idx = BitmapPtrToIdx<align>(ptr, size); + // Check that the object is not already returned. + ASSERT(bitmap_.GetBit(idx) == 0); + // Set the bit indicating where the object was returned. + bitmap_.SetBit(idx); +#ifndef NDEBUG + size_t after = bitmap_.CountBits(0, 64); + ASSERT(before + 1 == after); + ASSERT(allocated_ == embed_count_ - after); +#endif + return true; +} + inline Span::Location Span::location() const { return static_cast<Location>(location_); } @@ -539,12 +539,12 @@ inline uint64_t Span::freelist_added_time() const { return freelist_added_time_; } -inline bool Span::FreelistEmpty(size_t size) const { - if (size < kBitmapMinObjectSize) { - return (cache_size_ == 0 && freelist_ == kListEnd); - } else { - return (bitmap_.IsZero()); - } +inline bool Span::FreelistEmpty(size_t size) const { + if (size < kBitmapMinObjectSize) { + return (cache_size_ == 0 && freelist_ == kListEnd); + } else { + return (bitmap_.IsZero()); + } } inline void Span::RemoveFromList() { SpanList::Elem::remove(); } @@ -582,8 +582,8 @@ inline void Span::Init(PageId p, Length n) { sampled_ = 0; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_SPAN_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc index 6e4569dd83..40ebeb88d4 100644 --- a/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc @@ -25,9 +25,9 @@ #include "tcmalloc/span.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class RawSpan { @@ -42,7 +42,7 @@ class RawSpan { CHECK_CONDITION(res == 0); span_.set_first_page(PageIdContaining(mem)); span_.set_num_pages(npages); - span_.BuildFreelist(size, objects_per_span, nullptr, 0); + span_.BuildFreelist(size, objects_per_span, nullptr, 0); } ~RawSpan() { free(span_.start_address()); } @@ -207,6 +207,6 @@ BENCHMARK(BM_multiple_spans) ->Arg(kNumClasses - 1); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/span_stats.h b/contrib/libs/tcmalloc/tcmalloc/span_stats.h index 8c0b40b0fd..c64ce052f7 100644 --- a/contrib/libs/tcmalloc/tcmalloc/span_stats.h +++ b/contrib/libs/tcmalloc/tcmalloc/span_stats.h @@ -18,11 +18,11 @@ #include <stddef.h> #include "absl/base/macros.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { struct SpanStats { size_t num_spans_requested = 0; @@ -43,8 +43,8 @@ struct SpanStats { } }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_SPAN_STATS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/span_test.cc b/contrib/libs/tcmalloc/tcmalloc/span_test.cc index 750f3cca26..c7f33b1006 100644 --- a/contrib/libs/tcmalloc/tcmalloc/span_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/span_test.cc @@ -28,7 +28,7 @@ #include "tcmalloc/static_vars.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class RawSpan { @@ -43,7 +43,7 @@ class RawSpan { CHECK_CONDITION(res == 0); span_.set_first_page(PageIdContaining(mem)); span_.set_num_pages(npages); - span_.BuildFreelist(size, objects_per_span, nullptr, 0); + span_.BuildFreelist(size, objects_per_span, nullptr, 0); } ~RawSpan() { free(span_.start_address()); } @@ -84,7 +84,7 @@ class SpanTest : public testing::TestWithParam<size_t> { TEST_P(SpanTest, FreelistBasic) { Span &span_ = raw_span_.span(); - EXPECT_FALSE(span_.FreelistEmpty(size_)); + EXPECT_FALSE(span_.FreelistEmpty(size_)); void *batch[kMaxObjectsToMove]; size_t popped = 0; size_t want = 1; @@ -96,7 +96,7 @@ TEST_P(SpanTest, FreelistBasic) { for (;;) { size_t n = span_.FreelistPopBatch(batch, want, size_); popped += n; - EXPECT_EQ(span_.FreelistEmpty(size_), popped == objects_per_span_); + EXPECT_EQ(span_.FreelistEmpty(size_), popped == objects_per_span_); for (size_t i = 0; i < n; ++i) { void *p = batch[i]; uintptr_t off = reinterpret_cast<char *>(p) - start; @@ -114,7 +114,7 @@ TEST_P(SpanTest, FreelistBasic) { want = 1; } } - EXPECT_TRUE(span_.FreelistEmpty(size_)); + EXPECT_TRUE(span_.FreelistEmpty(size_)); EXPECT_EQ(span_.FreelistPopBatch(batch, 1, size_), 0); EXPECT_EQ(popped, objects_per_span_); @@ -123,7 +123,7 @@ TEST_P(SpanTest, FreelistBasic) { EXPECT_TRUE(objects[idx]); bool ok = span_.FreelistPush(start + idx * size_, size_); EXPECT_TRUE(ok); - EXPECT_FALSE(span_.FreelistEmpty(size_)); + EXPECT_FALSE(span_.FreelistEmpty(size_)); objects[idx] = false; --popped; } @@ -153,12 +153,12 @@ TEST_P(SpanTest, FreelistRandomized) { } else { EXPECT_EQ(objects.size(), 1); } - EXPECT_EQ(span_.FreelistEmpty(size_), objects_per_span_ == 1); + EXPECT_EQ(span_.FreelistEmpty(size_), objects_per_span_ == 1); } else { size_t want = absl::Uniform<int32_t>(rng, 0, batch_size_) + 1; size_t n = span_.FreelistPopBatch(batch, want, size_); if (n < want) { - EXPECT_TRUE(span_.FreelistEmpty(size_)); + EXPECT_TRUE(span_.FreelistEmpty(size_)); } for (size_t i = 0; i < n; ++i) { EXPECT_TRUE(objects.insert(batch[i]).second); @@ -187,5 +187,5 @@ TEST_P(SpanTest, FreelistRandomized) { INSTANTIATE_TEST_SUITE_P(All, SpanTest, testing::Range(size_t(1), kNumClasses)); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc index 5b5741b6a8..3933a55fc4 100644 --- a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc +++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc @@ -25,9 +25,9 @@ #include "tcmalloc/sampler.h" #include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, const StackTrace& t) const { // Do not merge entries with different sizes so that profiling tools @@ -103,13 +103,13 @@ void StackTraceTable::AddTrace(double count, const StackTrace& t) { depth_total_ += t.depth; bucket_total_++; b = Static::bucket_allocator().New(); - b->hash = h; - b->trace = t; + b->hash = h; + b->trace = t; b->trace.user_data = Static::CopySampleUserData(t.user_data); - b->count = count; - b->total_weight = t.weight * count; - b->next = table_[idx]; - table_[idx] = b; + b->count = count; + b->total_weight = t.weight * count; + b->next = table_[idx]; + table_[idx] = b; } } @@ -150,6 +150,6 @@ void StackTraceTable::Iterate( } } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h index a5a4a03636..bb6a1cc938 100644 --- a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h +++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h @@ -26,11 +26,11 @@ #include "tcmalloc/internal_malloc_extension.h" #include "tcmalloc/malloc_extension.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { -class StackTraceTable final : public ProfileBase { +class StackTraceTable final : public ProfileBase { public: // If merge is true, traces with identical size and stack are merged // together. Else they are kept distinct. @@ -90,8 +90,8 @@ class StackTraceTable final : public ProfileBase { int num_buckets() const { return bucket_mask_ + 1; } }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_STACK_TRACE_TABLE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc index 4579798906..f33b189d06 100644 --- a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc @@ -30,7 +30,7 @@ #include "tcmalloc/static_vars.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // Rather than deal with heap allocating stack/tags, AllocationEntry contains @@ -385,5 +385,5 @@ TEST(StackTraceTableTest, StackTraceTable) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/static_vars.cc b/contrib/libs/tcmalloc/tcmalloc/static_vars.cc index 08a70de493..379880f200 100644 --- a/contrib/libs/tcmalloc/tcmalloc/static_vars.cc +++ b/contrib/libs/tcmalloc/tcmalloc/static_vars.cc @@ -26,16 +26,16 @@ #include "tcmalloc/cpu_cache.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/mincore.h" -#include "tcmalloc/internal/numa.h" +#include "tcmalloc/internal/numa.h" #include "tcmalloc/malloc_extension.h" #include "tcmalloc/pagemap.h" #include "tcmalloc/sampler.h" #include "tcmalloc/thread_cache.h" #include "tcmalloc/tracking.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // Cacheline-align our SizeMap and CPUCache. They both have very hot arrays as // their first member variables, and aligning them reduces the number of cache @@ -47,8 +47,8 @@ ABSL_CONST_INIT absl::base_internal::SpinLock pageheap_lock( ABSL_CONST_INIT Arena Static::arena_; ABSL_CONST_INIT SizeMap ABSL_CACHELINE_ALIGNED Static::sizemap_; ABSL_CONST_INIT TransferCacheManager Static::transfer_cache_; -ABSL_CONST_INIT ShardedTransferCacheManager Static::sharded_transfer_cache_; -ABSL_CONST_INIT CPUCache ABSL_CACHELINE_ALIGNED Static::cpu_cache_; +ABSL_CONST_INIT ShardedTransferCacheManager Static::sharded_transfer_cache_; +ABSL_CONST_INIT CPUCache ABSL_CACHELINE_ALIGNED Static::cpu_cache_; ABSL_CONST_INIT PageHeapAllocator<Span> Static::span_allocator_; ABSL_CONST_INIT PageHeapAllocator<StackTrace> Static::stacktrace_allocator_; ABSL_CONST_INIT PageHeapAllocator<ThreadCache> Static::threadcache_allocator_; @@ -58,21 +58,21 @@ ABSL_CONST_INIT PeakHeapTracker Static::peak_heap_tracker_; ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_; ABSL_CONST_INIT std::atomic<bool> Static::inited_{false}; -ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; -ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; +ABSL_CONST_INIT bool Static::cpu_cache_active_ = false; +ABSL_CONST_INIT bool Static::fork_support_enabled_ = false; ABSL_CONST_INIT Static::CreateSampleUserDataCallback* Static::create_sample_user_data_callback_ = nullptr; ABSL_CONST_INIT Static::CopySampleUserDataCallback* Static::copy_sample_user_data_callback_ = nullptr; ABSL_CONST_INIT Static::DestroySampleUserDataCallback* Static::destroy_sample_user_data_callback_ = nullptr; -ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; -ABSL_CONST_INIT PageMap Static::pagemap_; +ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_; +ABSL_CONST_INIT PageMap Static::pagemap_; ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock( absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); ABSL_CONST_INIT GuardedPageAllocator Static::guardedpage_allocator_; -ABSL_CONST_INIT NumaTopology<kNumaPartitions, kNumBaseClasses> - Static::numa_topology_; +ABSL_CONST_INIT NumaTopology<kNumaPartitions, kNumBaseClasses> + Static::numa_topology_; size_t Static::metadata_bytes() { // This is ugly and doesn't nicely account for e.g. alignment losses @@ -80,14 +80,14 @@ size_t Static::metadata_bytes() { // struct's size. But we can't due to linking issues. const size_t static_var_size = sizeof(pageheap_lock) + sizeof(arena_) + sizeof(sizemap_) + - sizeof(sharded_transfer_cache_) + sizeof(transfer_cache_) + - sizeof(cpu_cache_) + sizeof(span_allocator_) + + sizeof(sharded_transfer_cache_) + sizeof(transfer_cache_) + + sizeof(cpu_cache_) + sizeof(span_allocator_) + sizeof(stacktrace_allocator_) + sizeof(threadcache_allocator_) + sizeof(sampled_objects_) + sizeof(bucket_allocator_) + sizeof(inited_) + sizeof(cpu_cache_active_) + sizeof(page_allocator_) + sizeof(pagemap_) + sizeof(sampled_objects_size_) + sizeof(peak_heap_tracker_) + sizeof(guarded_page_lock) + - sizeof(guardedpage_allocator_) + sizeof(numa_topology_); + sizeof(guardedpage_allocator_) + sizeof(numa_topology_); const size_t allocated = arena().bytes_allocated() + AddressRegionFactory::InternalBytesAllocated(); @@ -107,7 +107,7 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { if (!inited_.load(std::memory_order_acquire)) { tracking::Init(); sizemap_.Init(); - numa_topology_.Init(); + numa_topology_.Init(); span_allocator_.Init(&arena_); span_allocator_.New(); // Reduce cache conflicts span_allocator_.New(); // Reduce cache conflicts @@ -116,23 +116,23 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() { // Do a bit of sanitizing: make sure central_cache is aligned properly CHECK_CONDITION((sizeof(transfer_cache_) % ABSL_CACHELINE_SIZE) == 0); transfer_cache_.Init(); - sharded_transfer_cache_.Init(); + sharded_transfer_cache_.Init(); new (page_allocator_.memory) PageAllocator; threadcache_allocator_.Init(&arena_); cpu_cache_active_ = false; pagemap_.MapRootWithSmallPages(); guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128); inited_.store(true, std::memory_order_release); - - pageheap_lock.Unlock(); - pthread_atfork( - TCMallocPreFork, - TCMallocPostFork, - TCMallocPostFork); - pageheap_lock.Lock(); + + pageheap_lock.Unlock(); + pthread_atfork( + TCMallocPreFork, + TCMallocPostFork, + TCMallocPostFork); + pageheap_lock.Lock(); } } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/static_vars.h b/contrib/libs/tcmalloc/tcmalloc/static_vars.h index be68edc189..da4d56075c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/static_vars.h +++ b/contrib/libs/tcmalloc/tcmalloc/static_vars.h @@ -27,12 +27,12 @@ #include "absl/base/optimization.h" #include "absl/base/thread_annotations.h" #include "tcmalloc/arena.h" -#include "tcmalloc/central_freelist.h" +#include "tcmalloc/central_freelist.h" #include "tcmalloc/common.h" #include "tcmalloc/guarded_page_allocator.h" #include "tcmalloc/internal/atomic_stats_counter.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/numa.h" +#include "tcmalloc/internal/numa.h" #include "tcmalloc/internal/percpu.h" #include "tcmalloc/page_allocator.h" #include "tcmalloc/page_heap.h" @@ -42,17 +42,17 @@ #include "tcmalloc/stack_trace_table.h" #include "tcmalloc/transfer_cache.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { class CPUCache; class PageMap; class ThreadCache; -void TCMallocPreFork(); -void TCMallocPostFork(); - +void TCMallocPreFork(); +void TCMallocPostFork(); + class Static { public: // True if InitIfNecessary() has run to completion. @@ -61,29 +61,29 @@ class Static { // Safe to call multiple times. static void InitIfNecessary(); - // Central cache. - static const CentralFreeList& central_freelist(int size_class) { - return transfer_cache().central_freelist(size_class); - } + // Central cache. + static const CentralFreeList& central_freelist(int size_class) { + return transfer_cache().central_freelist(size_class); + } // Central cache -- an array of free-lists, one per size-class. // We have a separate lock per free-list to reduce contention. static TransferCacheManager& transfer_cache() { return transfer_cache_; } - // A per-cache domain TransferCache. - static ShardedTransferCacheManager& sharded_transfer_cache() { - return sharded_transfer_cache_; - } - + // A per-cache domain TransferCache. + static ShardedTransferCacheManager& sharded_transfer_cache() { + return sharded_transfer_cache_; + } + static SizeMap& sizemap() { return sizemap_; } static CPUCache& cpu_cache() { return cpu_cache_; } static PeakHeapTracker& peak_heap_tracker() { return peak_heap_tracker_; } - static NumaTopology<kNumaPartitions, kNumBaseClasses>& numa_topology() { - return numa_topology_; - } - + static NumaTopology<kNumaPartitions, kNumBaseClasses>& numa_topology() { + return numa_topology_; + } + ////////////////////////////////////////////////////////////////////// // In addition to the explicit initialization comment, the variables below // must be protected by pageheap_lock. @@ -116,7 +116,7 @@ class Static { // LossyAdd and reads do not require locking. static SpanList sampled_objects_ ABSL_GUARDED_BY(pageheap_lock); ABSL_CONST_INIT static tcmalloc_internal::StatsCounter sampled_objects_size_; - + static PageHeapAllocator<StackTraceTable::Bucket>& bucket_allocator() { return bucket_allocator_; } @@ -127,9 +127,9 @@ class Static { static void ActivateCPUCache() { cpu_cache_active_ = true; } static void DeactivateCPUCache() { cpu_cache_active_ = false; } - static bool ForkSupportEnabled() { return fork_support_enabled_; } - static void EnableForkSupport() { fork_support_enabled_ = true; } - + static bool ForkSupportEnabled() { return fork_support_enabled_; } + static void EnableForkSupport() { fork_support_enabled_ = true; } + using CreateSampleUserDataCallback = void*(); using CopySampleUserDataCallback = void*(void*); using DestroySampleUserDataCallback = void(void*); @@ -194,7 +194,7 @@ class Static { ABSL_CONST_INIT static Arena arena_; static SizeMap sizemap_; ABSL_CONST_INIT static TransferCacheManager transfer_cache_; - ABSL_CONST_INIT static ShardedTransferCacheManager sharded_transfer_cache_; + ABSL_CONST_INIT static ShardedTransferCacheManager sharded_transfer_cache_; static CPUCache cpu_cache_; ABSL_CONST_INIT static GuardedPageAllocator guardedpage_allocator_; static PageHeapAllocator<Span> span_allocator_; @@ -203,20 +203,20 @@ class Static { static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_; ABSL_CONST_INIT static std::atomic<bool> inited_; static bool cpu_cache_active_; - static bool fork_support_enabled_; + static bool fork_support_enabled_; static CreateSampleUserDataCallback* create_sample_user_data_callback_; static CopySampleUserDataCallback* copy_sample_user_data_callback_; static DestroySampleUserDataCallback* destroy_sample_user_data_callback_; ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_; - ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses> - numa_topology_; + ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses> + numa_topology_; // PageHeap uses a constructor for initialization. Like the members above, // we can't depend on initialization order, so pageheap is new'd // into this buffer. union PageAllocatorStorage { - constexpr PageAllocatorStorage() : extra(0) {} - + constexpr PageAllocatorStorage() : extra(0) {} + char memory[sizeof(PageAllocator)]; uintptr_t extra; // To force alignment }; @@ -255,8 +255,8 @@ inline void Span::Delete(Span* span) { Static::span_allocator().Delete(span); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_STATIC_VARS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/stats.cc b/contrib/libs/tcmalloc/tcmalloc/stats.cc index bb553ee5cd..c056501c93 100644 --- a/contrib/libs/tcmalloc/tcmalloc/stats.cc +++ b/contrib/libs/tcmalloc/tcmalloc/stats.cc @@ -26,7 +26,7 @@ #include "absl/base/dynamic_annotations.h" #include "absl/base/internal/cycleclock.h" #include "absl/base/macros.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "absl/strings/string_view.h" #include "absl/time/time.h" #include "tcmalloc/common.h" @@ -35,9 +35,9 @@ #include "tcmalloc/internal/util.h" #include "tcmalloc/pages.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { static double BytesToMiB(size_t bytes) { const double MiB = 1048576.0; @@ -49,8 +49,8 @@ static double PagesToMiB(uint64_t pages) { } // For example, PrintRightAdjustedWithPrefix(out, ">=", 42, 6) prints " >=42". -static void PrintRightAdjustedWithPrefix(Printer *out, const char *prefix, - Length num, int width) { +static void PrintRightAdjustedWithPrefix(Printer *out, const char *prefix, + Length num, int width) { width -= strlen(prefix); int num_tmp = num.raw_num(); for (int i = 0; i < width - 1; i++) { @@ -62,9 +62,9 @@ static void PrintRightAdjustedWithPrefix(Printer *out, const char *prefix, out->printf("%s%zu", prefix, num.raw_num()); } -void PrintStats(const char *label, Printer *out, const BackingStats &backing, - const SmallSpanStats &small, const LargeSpanStats &large, - bool everything) { +void PrintStats(const char *label, Printer *out, const BackingStats &backing, + const SmallSpanStats &small, const LargeSpanStats &large, + bool everything) { size_t nonempty_sizes = 0; for (int i = 0; i < kMaxPages.raw_num(); ++i) { const size_t norm = small.normal_length[i]; @@ -275,7 +275,7 @@ void PageAgeHistograms::Histogram::Record(Length pages, double age) { total_age_ += pages.raw_num() * age; } -void PageAgeHistograms::Print(const char *label, Printer *out) const { +void PageAgeHistograms::Print(const char *label, Printer *out) const { out->printf("------------------------------------------------\n"); out->printf( "%s cache entry age (count of pages in spans of " @@ -295,8 +295,8 @@ void PageAgeHistograms::Print(const char *label, Printer *out) const { returned_.Print("Unmapped span", out); } -static void PrintLineHeader(Printer *out, const char *kind, const char *prefix, - Length num) { +static void PrintLineHeader(Printer *out, const char *kind, const char *prefix, + Length num) { // Print the beginning of the line, e.g. "Live span, >=128 pages: ". The // span size ("128" in the example) is padded such that it plus the span // prefix ("Live") plus the span size prefix (">=") is kHeaderExtraChars wide. @@ -309,7 +309,7 @@ static void PrintLineHeader(Printer *out, const char *kind, const char *prefix, } void PageAgeHistograms::PerSizeHistograms::Print(const char *kind, - Printer *out) const { + Printer *out) const { out->printf("%-15s TOTAL PAGES: ", kind); total.Print(out); @@ -326,7 +326,7 @@ void PageAgeHistograms::PerSizeHistograms::Print(const char *kind, } } -void PageAgeHistograms::Histogram::Print(Printer *out) const { +void PageAgeHistograms::Histogram::Print(Printer *out) const { const double mean = avg_age(); out->printf(" %7.1f", mean); for (int b = 0; b < kNumBuckets; ++b) { @@ -336,7 +336,7 @@ void PageAgeHistograms::Histogram::Print(Printer *out) const { out->printf("\n"); } -void PageAllocInfo::Print(Printer *out) const { +void PageAllocInfo::Print(Printer *out) const { int64_t ticks = TimeTicks(); double hz = freq_ / ticks; out->printf("%s: stats on allocation sizes\n", label_); @@ -443,7 +443,7 @@ void PageAllocInfo::RecordAlloc(PageId p, Length n) { } else { Length slack = RoundUp(n, kPagesPerHugePage) - n; total_slack_ += slack; - size_t i = absl::bit_width(n.raw_num() - 1); + size_t i = absl::bit_width(n.raw_num() - 1); large_[i].Alloc(n); } } @@ -460,7 +460,7 @@ void PageAllocInfo::RecordFree(PageId p, Length n) { } else { Length slack = RoundUp(n, kPagesPerHugePage) - n; total_slack_ -= slack; - size_t i = absl::bit_width(n.raw_num() - 1); + size_t i = absl::bit_width(n.raw_num() - 1); large_[i].Free(n); } } @@ -476,7 +476,7 @@ const PageAllocInfo::Counts &PageAllocInfo::counts_for(Length n) const { if (n <= kMaxPages) { return small_[n.raw_num() - 1]; } - size_t i = absl::bit_width(n.raw_num() - 1); + size_t i = absl::bit_width(n.raw_num() - 1); return large_[i]; } @@ -548,6 +548,6 @@ int64_t PageAllocInfo::TimeTicks() const { return absl::base_internal::CycleClock::Now() - baseline_ticks_; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/stats.h b/contrib/libs/tcmalloc/tcmalloc/stats.h index 19070d867d..348077a063 100644 --- a/contrib/libs/tcmalloc/tcmalloc/stats.h +++ b/contrib/libs/tcmalloc/tcmalloc/stats.h @@ -24,9 +24,9 @@ #include "tcmalloc/internal/logging.h" #include "tcmalloc/pages.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { struct BackingStats { BackingStats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {} @@ -85,9 +85,9 @@ inline LargeSpanStats operator+(LargeSpanStats lhs, LargeSpanStats rhs) { return lhs += rhs; } -void PrintStats(const char* label, Printer* out, const BackingStats& backing, - const SmallSpanStats& small, const LargeSpanStats& large, - bool everything); +void PrintStats(const char* label, Printer* out, const BackingStats& backing, + const SmallSpanStats& small, const LargeSpanStats& large, + bool everything); class PageAgeHistograms { public: @@ -99,7 +99,7 @@ class PageAgeHistograms { // changed. void RecordRange(Length pages, bool released, int64_t when); - void Print(const char* label, Printer* out) const; + void Print(const char* label, Printer* out) const; static constexpr size_t kNumBuckets = 7; static constexpr size_t kNumSizes = 64; @@ -108,7 +108,7 @@ class PageAgeHistograms { class Histogram { public: void Record(Length pages, double age); - void Print(Printer* out) const; + void Print(Printer* out) const; uint32_t pages_in_bucket(size_t i) const { return buckets_[i]; } @@ -158,7 +158,7 @@ class PageAgeHistograms { private: struct PerSizeHistograms { void Record(Length pages, double age); - void Print(const char* kind, Printer* out) const; + void Print(const char* kind, Printer* out) const; Histogram* GetSmall(Length n) { CHECK_CONDITION(n.raw_num() < kNumSizes); @@ -204,7 +204,7 @@ class PageAllocInfo { void RecordFree(PageId p, Length n); void RecordRelease(Length n, Length got); // And invoking this in their Print() implementation. - void Print(Printer* out) const; + void Print(Printer* out) const; void PrintInPbtxt(PbtxtRegion* region, absl::string_view stat_name) const; // Total size of allocations < 1 MiB @@ -264,8 +264,8 @@ class PageAllocInfo { void LogRelease(int64_t when, Length n) { Write(when, 2, PageId{0}, n); } }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_STATS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/stats_test.cc b/contrib/libs/tcmalloc/tcmalloc/stats_test.cc index 733fcc9534..f79e81246f 100644 --- a/contrib/libs/tcmalloc/tcmalloc/stats_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/stats_test.cc @@ -24,7 +24,7 @@ #include "tcmalloc/huge_pages.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class PrintTest : public ::testing::Test { @@ -34,8 +34,8 @@ class PrintTest : public ::testing::Test { void ExpectStats(const BackingStats &back, const SmallSpanStats &small, const LargeSpanStats &large, const std::string &expected) { - Printer out(&buf_[0], kBufferSize); - PrintStats("PrintTest", &out, back, small, large, true); + Printer out(&buf_[0], kBufferSize); + PrintStats("PrintTest", &out, back, small, large, true); EXPECT_EQ(expected, buf_); } @@ -93,8 +93,8 @@ class AgeTest : public testing::Test { return kNow - freq * age; } - void ExpectAges(const PageAgeHistograms &ages, const std::string &expected) { - Printer out(&buf_[0], kBufferSize); + void ExpectAges(const PageAgeHistograms &ages, const std::string &expected) { + Printer out(&buf_[0], kBufferSize); ages.Print("AgeTest", &out); std::string got = buf_; EXPECT_EQ(expected, got); @@ -102,7 +102,7 @@ class AgeTest : public testing::Test { }; TEST_F(AgeTest, Basic) { - PageAgeHistograms ages(kNow); + PageAgeHistograms ages(kNow); ages.RecordRange(Length(1), false, WhenForAge(0.5)); ages.RecordRange(Length(1), false, WhenForAge(1.2)); ages.RecordRange(Length(1), false, WhenForAge(3.7)); @@ -134,7 +134,7 @@ Unmapped span, >=64 pages: 600.0 0 0 0 200 0 } TEST_F(AgeTest, Overflow) { - PageAgeHistograms ages(kNow); + PageAgeHistograms ages(kNow); const Length too_big = Length(4 * (std::numeric_limits<uint32_t>::max() / 5)); ages.RecordRange(too_big, false, WhenForAge(0.5)); ages.RecordRange(too_big, false, WhenForAge(0.5)); @@ -155,8 +155,8 @@ Unmapped span TOTAL PAGES: 0.0 0 0 0 0 0 } TEST_F(AgeTest, ManySizes) { - PageAgeHistograms ages(kNow); - const Length N = PageAgeHistograms::kLargeSize; + PageAgeHistograms ages(kNow); + const Length N = PageAgeHistograms::kLargeSize; for (auto i = Length(1); i <= N; ++i) { ages.RecordRange(i, false, WhenForAge(i.raw_num() * 3)); } @@ -264,5 +264,5 @@ TEST(ClockTest, ClockTicks) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc b/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc index b079c9c966..61854abdcf 100644 --- a/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc +++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc @@ -14,12 +14,12 @@ #include "tcmalloc/system-alloc.h" -#include <asm/unistd.h> +#include <asm/unistd.h> #include <errno.h> #include <stddef.h> #include <stdint.h> #include <sys/mman.h> -#include <sys/syscall.h> +#include <sys/syscall.h> #include <unistd.h> #include <algorithm> @@ -34,7 +34,7 @@ #include "absl/base/internal/spinlock.h" #include "absl/base/macros.h" #include "absl/base/optimization.h" -#include "absl/types/optional.h" +#include "absl/types/optional.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/optimization.h" @@ -54,13 +54,13 @@ extern "C" int madvise(caddr_t, size_t, int); #endif -#ifdef __linux__ -#include <linux/mempolicy.h> -#endif - -GOOGLE_MALLOC_SECTION_BEGIN +#ifdef __linux__ +#include <linux/mempolicy.h> +#endif + +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { @@ -100,7 +100,7 @@ AddressRegionFactory* region_factory = nullptr; // Rounds size down to a multiple of alignment. size_t RoundDown(const size_t size, const size_t alignment) { // Checks that the alignment has only one bit set. - ASSERT(absl::has_single_bit(alignment)); + ASSERT(absl::has_single_bit(alignment)); return (size) & ~(alignment - 1); } @@ -114,7 +114,7 @@ class MmapRegion final : public AddressRegion { MmapRegion(uintptr_t start, size_t size, AddressRegionFactory::UsageHint hint) : start_(start), free_size_(size), hint_(hint) {} std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override; - ~MmapRegion() override = default; + ~MmapRegion() override = default; private: const uintptr_t start_; @@ -127,7 +127,7 @@ class MmapRegionFactory final : public AddressRegionFactory { AddressRegion* Create(void* start, size_t size, UsageHint hint) override; size_t GetStats(absl::Span<char> buffer) override; size_t GetStatsInPbtxt(absl::Span<char> buffer) override; - ~MmapRegionFactory() override = default; + ~MmapRegionFactory() override = default; private: std::atomic<size_t> bytes_reserved_{0}; @@ -140,7 +140,7 @@ class RegionManager { std::pair<void*, size_t> Alloc(size_t size, size_t alignment, MemoryTag tag); void DiscardMappedRegions() { - std::fill(normal_region_.begin(), normal_region_.end(), nullptr); + std::fill(normal_region_.begin(), normal_region_.end(), nullptr); sampled_region_ = nullptr; } @@ -151,7 +151,7 @@ class RegionManager { std::pair<void*, size_t> Allocate(size_t size, size_t alignment, MemoryTag tag); - std::array<AddressRegion*, kNumaPartitions> normal_region_{{nullptr}}; + std::array<AddressRegion*, kNumaPartitions> normal_region_{{nullptr}}; AddressRegion* sampled_region_{nullptr}; }; std::aligned_storage<sizeof(RegionManager), alignof(RegionManager)>::type @@ -198,7 +198,7 @@ AddressRegion* MmapRegionFactory::Create(void* start, size_t size, } size_t MmapRegionFactory::GetStats(absl::Span<char> buffer) { - Printer printer(buffer.data(), buffer.size()); + Printer printer(buffer.data(), buffer.size()); size_t allocated = bytes_reserved_.load(std::memory_order_relaxed); constexpr double MiB = 1048576.0; printer.printf("MmapSysAllocator: %zu bytes (%.1f MiB) reserved\n", allocated, @@ -208,7 +208,7 @@ size_t MmapRegionFactory::GetStats(absl::Span<char> buffer) { } size_t MmapRegionFactory::GetStatsInPbtxt(absl::Span<char> buffer) { - Printer printer(buffer.data(), buffer.size()); + Printer printer(buffer.data(), buffer.size()); size_t allocated = bytes_reserved_.load(std::memory_order_relaxed); printer.printf("mmap_sys_allocator: %lld\n", allocated); @@ -219,7 +219,7 @@ static AddressRegionFactory::UsageHint TagToHint(MemoryTag tag) { using UsageHint = AddressRegionFactory::UsageHint; switch (tag) { case MemoryTag::kNormal: - case MemoryTag::kNormalP1: + case MemoryTag::kNormalP1: return UsageHint::kNormal; break; case MemoryTag::kSampled: @@ -275,9 +275,9 @@ std::pair<void*, size_t> RegionManager::Allocate(size_t size, size_t alignment, AddressRegion*& region = *[&]() { switch (tag) { case MemoryTag::kNormal: - return &normal_region_[0]; - case MemoryTag::kNormalP1: - return &normal_region_[1]; + return &normal_region_[0]; + case MemoryTag::kNormalP1: + return &normal_region_[1]; case MemoryTag::kSampled: return &sampled_region_; default: @@ -318,50 +318,50 @@ void InitSystemAllocatorIfNecessary() { region_factory = new (&mmap_space) MmapRegionFactory(); } -// Bind the memory region spanning `size` bytes starting from `base` to NUMA -// nodes assigned to `partition`. Returns zero upon success, or a standard -// error code upon failure. -void BindMemory(void* const base, const size_t size, const size_t partition) { - auto& topology = Static::numa_topology(); - - // If NUMA awareness is unavailable or disabled, or the user requested that - // we don't bind memory then do nothing. - const NumaBindMode bind_mode = topology.bind_mode(); - if (!topology.numa_aware() || bind_mode == NumaBindMode::kNone) { - return; - } - - const uint64_t nodemask = topology.GetPartitionNodes(partition); - int err = - syscall(__NR_mbind, base, size, MPOL_BIND | MPOL_F_STATIC_NODES, - &nodemask, sizeof(nodemask) * 8, MPOL_MF_STRICT | MPOL_MF_MOVE); - if (err == 0) { - return; - } - - if (bind_mode == NumaBindMode::kAdvisory) { - Log(kLogWithStack, __FILE__, __LINE__, "Warning: Unable to mbind memory", - err, base, nodemask); - return; - } - - ASSERT(bind_mode == NumaBindMode::kStrict); - Crash(kCrash, __FILE__, __LINE__, "Unable to mbind memory", err, base, - nodemask); -} - +// Bind the memory region spanning `size` bytes starting from `base` to NUMA +// nodes assigned to `partition`. Returns zero upon success, or a standard +// error code upon failure. +void BindMemory(void* const base, const size_t size, const size_t partition) { + auto& topology = Static::numa_topology(); + + // If NUMA awareness is unavailable or disabled, or the user requested that + // we don't bind memory then do nothing. + const NumaBindMode bind_mode = topology.bind_mode(); + if (!topology.numa_aware() || bind_mode == NumaBindMode::kNone) { + return; + } + + const uint64_t nodemask = topology.GetPartitionNodes(partition); + int err = + syscall(__NR_mbind, base, size, MPOL_BIND | MPOL_F_STATIC_NODES, + &nodemask, sizeof(nodemask) * 8, MPOL_MF_STRICT | MPOL_MF_MOVE); + if (err == 0) { + return; + } + + if (bind_mode == NumaBindMode::kAdvisory) { + Log(kLogWithStack, __FILE__, __LINE__, "Warning: Unable to mbind memory", + err, base, nodemask); + return; + } + + ASSERT(bind_mode == NumaBindMode::kStrict); + Crash(kCrash, __FILE__, __LINE__, "Unable to mbind memory", err, base, + nodemask); +} + ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0); } // namespace -void AcquireSystemAllocLock() { - spinlock.Lock(); -} - -void ReleaseSystemAllocLock() { - spinlock.Unlock(); -} - +void AcquireSystemAllocLock() { + spinlock.Lock(); +} + +void ReleaseSystemAllocLock() { + spinlock.Unlock(); +} + void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, const MemoryTag tag) { // If default alignment is set request the minimum alignment provided by @@ -386,7 +386,7 @@ void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment, if (result != nullptr) { CheckAddressBits<kAddressBits>(reinterpret_cast<uintptr_t>(result) + *actual_bytes - 1); - ASSERT(GetMemoryTag(result) == tag); + ASSERT(GetMemoryTag(result) == tag); } return result; } @@ -541,7 +541,7 @@ static uintptr_t RandomMmapHint(size_t size, size_t alignment, // Ensure alignment >= size so we're guaranteed the full mapping has the same // tag. - alignment = absl::bit_ceil(std::max(alignment, size)); + alignment = absl::bit_ceil(std::max(alignment, size)); rnd = Sampler::NextRandom(rnd); uintptr_t addr = rnd & kAddrMask & ~(alignment - 1) & ~kTagMask; @@ -555,19 +555,19 @@ void* MmapAligned(size_t size, size_t alignment, const MemoryTag tag) { ASSERT(alignment <= kTagMask); static uintptr_t next_sampled_addr = 0; - static std::array<uintptr_t, kNumaPartitions> next_normal_addr = {0}; + static std::array<uintptr_t, kNumaPartitions> next_normal_addr = {0}; - absl::optional<int> numa_partition; + absl::optional<int> numa_partition; uintptr_t& next_addr = *[&]() { switch (tag) { case MemoryTag::kSampled: return &next_sampled_addr; - case MemoryTag::kNormalP0: - numa_partition = 0; - return &next_normal_addr[0]; - case MemoryTag::kNormalP1: - numa_partition = 1; - return &next_normal_addr[1]; + case MemoryTag::kNormalP0: + numa_partition = 0; + return &next_normal_addr[0]; + case MemoryTag::kNormalP1: + numa_partition = 1; + return &next_normal_addr[1]; default: ASSUME(false); __builtin_unreachable(); @@ -579,17 +579,17 @@ void* MmapAligned(size_t size, size_t alignment, const MemoryTag tag) { GetMemoryTag(reinterpret_cast<void*>(next_addr + size - 1)) != tag) { next_addr = RandomMmapHint(size, alignment, tag); } - void* hint; + void* hint; for (int i = 0; i < 1000; ++i) { - hint = reinterpret_cast<void*>(next_addr); + hint = reinterpret_cast<void*>(next_addr); ASSERT(GetMemoryTag(hint) == tag); // TODO(b/140190055): Use MAP_FIXED_NOREPLACE once available. void* result = mmap(hint, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (result == hint) { - if (numa_partition.has_value()) { - BindMemory(result, size, *numa_partition); - } + if (numa_partition.has_value()) { + BindMemory(result, size, *numa_partition); + } // Attempt to keep the next mmap contiguous in the common case. next_addr += size; CHECK_CONDITION(kAddressBits == std::numeric_limits<uintptr_t>::digits || @@ -612,12 +612,12 @@ void* MmapAligned(size_t size, size_t alignment, const MemoryTag tag) { } Log(kLogWithStack, __FILE__, __LINE__, - "MmapAligned() failed - unable to allocate with tag (hint, size, " - "alignment) - is something limiting address placement?", - hint, size, alignment); + "MmapAligned() failed - unable to allocate with tag (hint, size, " + "alignment) - is something limiting address placement?", + hint, size, alignment); return nullptr; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc.h b/contrib/libs/tcmalloc/tcmalloc/system-alloc.h index 3d1e7fd60b..a38192c233 100644 --- a/contrib/libs/tcmalloc/tcmalloc/system-alloc.h +++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc.h @@ -23,9 +23,9 @@ #include "tcmalloc/common.h" #include "tcmalloc/malloc_extension.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // REQUIRES: "alignment" is a power of two or "0" to indicate default alignment // REQUIRES: "alignment" and "size" <= kTagMask @@ -50,9 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment, // call to SystemRelease. int SystemReleaseErrors(); -void AcquireSystemAllocLock(); -void ReleaseSystemAllocLock(); - +void AcquireSystemAllocLock(); +void ReleaseSystemAllocLock(); + // This call is a hint to the operating system that the pages // contained in the specified range of memory will not be used for a // while, and can be released for use by other processes or the OS. @@ -84,8 +84,8 @@ void SetRegionFactory(AddressRegionFactory *factory); // REQUIRES: size <= kTagMask void *MmapAligned(size_t size, size_t alignment, MemoryTag tag); -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_SYSTEM_ALLOC_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc index 496bd048ee..c52bd569d9 100644 --- a/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc @@ -25,13 +25,13 @@ #include "gtest/gtest.h" #include "absl/strings/str_format.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/malloc_extension.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { class MmapAlignedTest : public testing::TestWithParam<size_t> { @@ -42,12 +42,12 @@ class MmapAlignedTest : public testing::TestWithParam<size_t> { for (MemoryTag tag : {MemoryTag::kNormal, MemoryTag::kSampled}) { SCOPED_TRACE(static_cast<unsigned int>(tag)); - void* p = MmapAligned(size, alignment, tag); + void* p = MmapAligned(size, alignment, tag); EXPECT_NE(p, nullptr); EXPECT_EQ(reinterpret_cast<uintptr_t>(p) % alignment, 0); - EXPECT_EQ(IsTaggedMemory(p), tag == MemoryTag::kSampled); - EXPECT_EQ(GetMemoryTag(p), tag); - EXPECT_EQ(GetMemoryTag(static_cast<char*>(p) + size - 1), tag); + EXPECT_EQ(IsTaggedMemory(p), tag == MemoryTag::kSampled); + EXPECT_EQ(GetMemoryTag(p), tag); + EXPECT_EQ(GetMemoryTag(static_cast<char*>(p) + size - 1), tag); EXPECT_EQ(munmap(p, size), 0); } } @@ -107,11 +107,11 @@ TEST(Basic, InvokedTest) { MallocExtension::SetRegionFactory(&f); // An allocation size that is likely to trigger the system allocator. - void* ptr = ::operator new(kMinSystemAlloc); - // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. - benchmark::DoNotOptimize(ptr); - ::operator delete(ptr); + void* ptr = ::operator new(kMinSystemAlloc); + // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new, + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295. + benchmark::DoNotOptimize(ptr); + ::operator delete(ptr); // Make sure that our allocator was invoked. ASSERT_TRUE(simple_region_alloc_invoked); @@ -143,5 +143,5 @@ TEST(Basic, RetryFailTest) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc index 8e62ba91b9..75ef562f2c 100644 --- a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc +++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc @@ -81,7 +81,7 @@ #include "absl/base/thread_annotations.h" #include "absl/debugging/stacktrace.h" #include "absl/memory/memory.h" -#include "absl/numeric/bits.h" +#include "absl/numeric/bits.h" #include "absl/strings/match.h" #include "absl/strings/numbers.h" #include "absl/strings/strip.h" @@ -115,13 +115,13 @@ #include "tcmalloc/transfer_cache.h" #include "tcmalloc/transfer_cache_stats.h" -#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO) +#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO) #include <malloc.h> #endif -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { // ----------------------- IMPLEMENTATION ------------------------------- @@ -131,7 +131,7 @@ struct TCMallocStats { uint64_t central_bytes; // Bytes in central cache uint64_t transfer_bytes; // Bytes in central transfer cache uint64_t metadata_bytes; // Bytes alloced for metadata - uint64_t sharded_transfer_bytes; // Bytes in per-CCX cache + uint64_t sharded_transfer_bytes; // Bytes in per-CCX cache uint64_t per_cpu_bytes; // Bytes in per-CPU cache uint64_t pagemap_root_bytes_res; // Resident bytes of pagemap root node uint64_t percpu_metadata_bytes_res; // Resident bytes of the per-CPU metadata @@ -141,10 +141,10 @@ struct TCMallocStats { AllocatorStats bucket_stats; // StackTraceTable::Bucket objects size_t pagemap_bytes; // included in metadata bytes size_t percpu_metadata_bytes; // included in metadata bytes - BackingStats pageheap; // Stats from page heap - - // Explicitly declare the ctor to put it in the google_malloc section. - TCMallocStats() = default; + BackingStats pageheap; // Stats from page heap + + // Explicitly declare the ctor to put it in the google_malloc section. + TCMallocStats() = default; }; // Get stats into "r". Also, if class_count != NULL, class_count[k] @@ -155,15 +155,15 @@ struct TCMallocStats { // should be captured or not. Residence info requires a potentially // costly OS call, and is not necessary in all situations. static void ExtractStats(TCMallocStats* r, uint64_t* class_count, - SpanStats* span_stats, SmallSpanStats* small_spans, - LargeSpanStats* large_spans, - TransferCacheStats* tc_stats, bool report_residence) { + SpanStats* span_stats, SmallSpanStats* small_spans, + LargeSpanStats* large_spans, + TransferCacheStats* tc_stats, bool report_residence) { r->central_bytes = 0; r->transfer_bytes = 0; for (int cl = 0; cl < kNumClasses; ++cl) { - const size_t length = Static::central_freelist(cl).length(); + const size_t length = Static::central_freelist(cl).length(); const size_t tc_length = Static::transfer_cache().tc_length(cl); - const size_t cache_overhead = Static::central_freelist(cl).OverheadBytes(); + const size_t cache_overhead = Static::central_freelist(cl).OverheadBytes(); const size_t size = Static::sizemap().class_to_size(cl); r->central_bytes += (size * length) + cache_overhead; r->transfer_bytes += (size * tc_length); @@ -171,12 +171,12 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count, // Sum the lengths of all per-class freelists, except the per-thread // freelists, which get counted when we call GetThreadStats(), below. class_count[cl] = length + tc_length; - if (UsePerCpuCache()) { + if (UsePerCpuCache()) { class_count[cl] += Static::cpu_cache().TotalObjectsOfClass(cl); } } if (span_stats) { - span_stats[cl] = Static::central_freelist(cl).GetSpanStats(); + span_stats[cl] = Static::central_freelist(cl).GetSpanStats(); } if (tc_stats) { tc_stats[cl] = Static::transfer_cache().GetHitRateStats(cl); @@ -215,12 +215,12 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count, } r->per_cpu_bytes = 0; - r->sharded_transfer_bytes = 0; + r->sharded_transfer_bytes = 0; r->percpu_metadata_bytes_res = 0; r->percpu_metadata_bytes = 0; - if (UsePerCpuCache()) { + if (UsePerCpuCache()) { r->per_cpu_bytes = Static::cpu_cache().TotalUsedBytes(); - r->sharded_transfer_bytes = Static::sharded_transfer_cache().TotalBytes(); + r->sharded_transfer_bytes = Static::sharded_transfer_cache().TotalBytes(); if (report_residence) { auto percpu_metadata = Static::cpu_cache().MetadataMemoryUsage(); @@ -251,8 +251,8 @@ static uint64_t InUseByApp(const TCMallocStats& stats) { return StatSub(stats.pageheap.system_bytes, stats.thread_bytes + stats.central_bytes + stats.transfer_bytes + stats.per_cpu_bytes + - stats.sharded_transfer_bytes + stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes); + stats.sharded_transfer_bytes + stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); } static uint64_t VirtualMemoryUsed(const TCMallocStats& stats) { @@ -269,21 +269,21 @@ static uint64_t RequiredBytes(const TCMallocStats& stats) { return StatSub(PhysicalMemoryUsed(stats), stats.pageheap.free_bytes); } -static int CountAllowedCpus() { - cpu_set_t allowed_cpus; - if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) { - return 0; - } - - return CPU_COUNT(&allowed_cpus); -} - +static int CountAllowedCpus() { + cpu_set_t allowed_cpus; + if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) { + return 0; + } + + return CPU_COUNT(&allowed_cpus); +} + // WRITE stats to "out" -static void DumpStats(Printer* out, int level) { +static void DumpStats(Printer* out, int level) { TCMallocStats stats; uint64_t class_count[kNumClasses]; - SpanStats span_stats[kNumClasses]; - TransferCacheStats tc_stats[kNumClasses]; + SpanStats span_stats[kNumClasses]; + TransferCacheStats tc_stats[kNumClasses]; if (level >= 2) { ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, tc_stats, true); @@ -312,7 +312,7 @@ static void DumpStats(Printer* out, int level) { "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in per-CPU cache freelist\n" - "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in Sharded cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in Sharded cache freelist\n" "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" @@ -335,13 +335,13 @@ static void DumpStats(Printer* out, int level) { "MALLOC: %12" PRIu64 " (%7.1f MiB) per-CPU slab bytes used\n" "MALLOC: %12" PRIu64 " (%7.1f MiB) per-CPU slab resident bytes\n" "MALLOC: %12" PRIu64 " Tcmalloc page size\n" - "MALLOC: %12" PRIu64 " Tcmalloc hugepage size\n" - "MALLOC: %12" PRIu64 " CPUs Allowed in Mask\n", + "MALLOC: %12" PRIu64 " Tcmalloc hugepage size\n" + "MALLOC: %12" PRIu64 " CPUs Allowed in Mask\n", bytes_in_use_by_app, bytes_in_use_by_app / MiB, stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, stats.central_bytes, stats.central_bytes / MiB, stats.per_cpu_bytes, stats.per_cpu_bytes / MiB, - stats.sharded_transfer_bytes, stats.sharded_transfer_bytes / MiB, + stats.sharded_transfer_bytes, stats.sharded_transfer_bytes / MiB, stats.transfer_bytes, stats.transfer_bytes / MiB, stats.thread_bytes, stats.thread_bytes / MiB, stats.metadata_bytes, stats.metadata_bytes / MiB, @@ -367,18 +367,18 @@ static void DumpStats(Printer* out, int level) { stats.percpu_metadata_bytes / MiB, stats.percpu_metadata_bytes_res, stats.percpu_metadata_bytes_res / MiB, uint64_t(kPageSize), - uint64_t(kHugePageSize), - CountAllowedCpus()); + uint64_t(kHugePageSize), + CountAllowedCpus()); // clang-format on - PrintExperiments(out); + PrintExperiments(out); out->printf( "MALLOC SAMPLED PROFILES: %zu bytes (current), %zu bytes (peak)\n", - static_cast<size_t>(Static::sampled_objects_size_.value()), - Static::peak_heap_tracker().CurrentPeakSize()); + static_cast<size_t>(Static::sampled_objects_size_.value()), + Static::peak_heap_tracker().CurrentPeakSize()); - MemoryStats memstats; - if (GetMemoryStats(&memstats)) { + MemoryStats memstats; + if (GetMemoryStats(&memstats)) { uint64_t rss = memstats.rss; uint64_t vss = memstats.vss; // clang-format off @@ -423,33 +423,33 @@ static void DumpStats(Printer* out, int level) { } out->printf("------------------------------------------------\n"); - out->printf("Transfer cache implementation: %s\n", - TransferCacheImplementationToLabel( - Static::transfer_cache().implementation())); - - out->printf("------------------------------------------------\n"); + out->printf("Transfer cache implementation: %s\n", + TransferCacheImplementationToLabel( + Static::transfer_cache().implementation())); + + out->printf("------------------------------------------------\n"); out->printf("Transfer cache insert/remove hits/misses by size class\n"); for (int cl = 1; cl < kNumClasses; ++cl) { out->printf( "class %3d [ %8zu bytes ] : %8" PRIu64 " insert hits; %8" PRIu64 - " insert misses (%8lu partial); %8" PRIu64 " remove hits; %8" PRIu64 - " remove misses (%8lu partial);\n", + " insert misses (%8lu partial); %8" PRIu64 " remove hits; %8" PRIu64 + " remove misses (%8lu partial);\n", cl, Static::sizemap().class_to_size(cl), tc_stats[cl].insert_hits, - tc_stats[cl].insert_misses, tc_stats[cl].insert_non_batch_misses, - tc_stats[cl].remove_hits, tc_stats[cl].remove_misses, - tc_stats[cl].remove_non_batch_misses); + tc_stats[cl].insert_misses, tc_stats[cl].insert_non_batch_misses, + tc_stats[cl].remove_hits, tc_stats[cl].remove_misses, + tc_stats[cl].remove_non_batch_misses); } - if (UsePerCpuCache()) { + if (UsePerCpuCache()) { Static::cpu_cache().Print(out); } - Static::page_allocator().Print(out, MemoryTag::kNormal); - if (Static::numa_topology().active_partitions() > 1) { - Static::page_allocator().Print(out, MemoryTag::kNormalP1); - } - Static::page_allocator().Print(out, MemoryTag::kSampled); - tracking::Print(out); + Static::page_allocator().Print(out, MemoryTag::kNormal); + if (Static::numa_topology().active_partitions() > 1) { + Static::page_allocator().Print(out, MemoryTag::kNormalP1); + } + Static::page_allocator().Print(out, MemoryTag::kSampled); + tracking::Print(out); Static::guardedpage_allocator().Print(out); uint64_t limit_bytes; @@ -461,28 +461,28 @@ static void DumpStats(Printer* out, int level) { Static::page_allocator().limit_hits()); out->printf("PARAMETER tcmalloc_per_cpu_caches %d\n", - Parameters::per_cpu_caches() ? 1 : 0); + Parameters::per_cpu_caches() ? 1 : 0); out->printf("PARAMETER tcmalloc_max_per_cpu_cache_size %d\n", - Parameters::max_per_cpu_cache_size()); + Parameters::max_per_cpu_cache_size()); out->printf("PARAMETER tcmalloc_max_total_thread_cache_bytes %lld\n", - Parameters::max_total_thread_cache_bytes()); + Parameters::max_total_thread_cache_bytes()); out->printf("PARAMETER malloc_release_bytes_per_sec %llu\n", - Parameters::background_release_rate()); - out->printf( - "PARAMETER tcmalloc_skip_subrelease_interval %s\n", - absl::FormatDuration(Parameters::filler_skip_subrelease_interval())); + Parameters::background_release_rate()); + out->printf( + "PARAMETER tcmalloc_skip_subrelease_interval %s\n", + absl::FormatDuration(Parameters::filler_skip_subrelease_interval())); out->printf("PARAMETER flat vcpus %d\n", - subtle::percpu::UsingFlatVirtualCpus() ? 1 : 0); + subtle::percpu::UsingFlatVirtualCpus() ? 1 : 0); } } namespace { -/*static*/ void DumpStatsInPbtxt(Printer* out, int level) { +/*static*/ void DumpStatsInPbtxt(Printer* out, int level) { TCMallocStats stats; uint64_t class_count[kNumClasses]; - SpanStats span_stats[kNumClasses]; - TransferCacheStats tc_stats[kNumClasses]; + SpanStats span_stats[kNumClasses]; + TransferCacheStats tc_stats[kNumClasses]; if (level >= 2) { ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, tc_stats, true); @@ -499,8 +499,8 @@ namespace { region.PrintI64("page_heap_freelist", stats.pageheap.free_bytes); region.PrintI64("central_cache_freelist", stats.central_bytes); region.PrintI64("per_cpu_cache_freelist", stats.per_cpu_bytes); - region.PrintI64("sharded_transfer_cache_freelist", - stats.sharded_transfer_bytes); + region.PrintI64("sharded_transfer_cache_freelist", + stats.sharded_transfer_bytes); region.PrintI64("transfer_cache_freelist", stats.transfer_bytes); region.PrintI64("thread_cache_freelists", stats.thread_bytes); region.PrintI64("malloc_metadata", stats.metadata_bytes); @@ -522,19 +522,19 @@ namespace { region.PrintI64("percpu_slab_size", stats.percpu_metadata_bytes); region.PrintI64("percpu_slab_residence", stats.percpu_metadata_bytes_res); region.PrintI64("tcmalloc_page_size", uint64_t(kPageSize)); - region.PrintI64("tcmalloc_huge_page_size", uint64_t(kHugePageSize)); - region.PrintI64("cpus_allowed", CountAllowedCpus()); + region.PrintI64("tcmalloc_huge_page_size", uint64_t(kHugePageSize)); + region.PrintI64("cpus_allowed", CountAllowedCpus()); { auto sampled_profiles = region.CreateSubRegion("sampled_profiles"); sampled_profiles.PrintI64("current_bytes", - Static::sampled_objects_size_.value()); - sampled_profiles.PrintI64("peak_bytes", - Static::peak_heap_tracker().CurrentPeakSize()); + Static::sampled_objects_size_.value()); + sampled_profiles.PrintI64("peak_bytes", + Static::peak_heap_tracker().CurrentPeakSize()); } // Print total process stats (inclusive of non-malloc sources). - MemoryStats memstats; + MemoryStats memstats; if (GetMemoryStats(&memstats)) { region.PrintI64("total_resident", uint64_t(memstats.rss)); region.PrintI64("total_mapped", uint64_t(memstats.vss)); @@ -561,28 +561,28 @@ namespace { entry.PrintI64("sizeclass", Static::sizemap().class_to_size(cl)); entry.PrintI64("insert_hits", tc_stats[cl].insert_hits); entry.PrintI64("insert_misses", tc_stats[cl].insert_misses); - entry.PrintI64("insert_non_batch_misses", - tc_stats[cl].insert_non_batch_misses); + entry.PrintI64("insert_non_batch_misses", + tc_stats[cl].insert_non_batch_misses); entry.PrintI64("remove_hits", tc_stats[cl].remove_hits); entry.PrintI64("remove_misses", tc_stats[cl].remove_misses); - entry.PrintI64("remove_non_batch_misses", - tc_stats[cl].remove_non_batch_misses); + entry.PrintI64("remove_non_batch_misses", + tc_stats[cl].remove_non_batch_misses); } } - region.PrintRaw("transfer_cache_implementation", - TransferCacheImplementationToLabel( - Static::transfer_cache().implementation())); - - if (UsePerCpuCache()) { + region.PrintRaw("transfer_cache_implementation", + TransferCacheImplementationToLabel( + Static::transfer_cache().implementation())); + + if (UsePerCpuCache()) { Static::cpu_cache().PrintInPbtxt(®ion); } } - Static::page_allocator().PrintInPbtxt(®ion, MemoryTag::kNormal); - if (Static::numa_topology().active_partitions() > 1) { - Static::page_allocator().PrintInPbtxt(®ion, MemoryTag::kNormalP1); - } - Static::page_allocator().PrintInPbtxt(®ion, MemoryTag::kSampled); + Static::page_allocator().PrintInPbtxt(®ion, MemoryTag::kNormal); + if (Static::numa_topology().active_partitions() > 1) { + Static::page_allocator().PrintInPbtxt(®ion, MemoryTag::kNormalP1); + } + Static::page_allocator().PrintInPbtxt(®ion, MemoryTag::kSampled); // We do not collect tracking information in pbtxt. size_t limit_bytes; @@ -597,20 +597,20 @@ namespace { Static::guardedpage_allocator().PrintInPbtxt(&gwp_asan); } - region.PrintI64("memory_release_failures", SystemReleaseErrors()); + region.PrintI64("memory_release_failures", SystemReleaseErrors()); - region.PrintBool("tcmalloc_per_cpu_caches", Parameters::per_cpu_caches()); + region.PrintBool("tcmalloc_per_cpu_caches", Parameters::per_cpu_caches()); region.PrintI64("tcmalloc_max_per_cpu_cache_size", - Parameters::max_per_cpu_cache_size()); + Parameters::max_per_cpu_cache_size()); region.PrintI64("tcmalloc_max_total_thread_cache_bytes", - Parameters::max_total_thread_cache_bytes()); - region.PrintI64("malloc_release_bytes_per_sec", - static_cast<int64_t>(Parameters::background_release_rate())); + Parameters::max_total_thread_cache_bytes()); + region.PrintI64("malloc_release_bytes_per_sec", + static_cast<int64_t>(Parameters::background_release_rate())); region.PrintI64( - "tcmalloc_skip_subrelease_interval_ns", - absl::ToInt64Nanoseconds(Parameters::filler_skip_subrelease_interval())); - region.PrintRaw("percpu_vcpu_type", - subtle::percpu::UsingFlatVirtualCpus() ? "FLAT" : "NONE"); + "tcmalloc_skip_subrelease_interval_ns", + absl::ToInt64Nanoseconds(Parameters::filler_skip_subrelease_interval())); + region.PrintRaw("percpu_vcpu_type", + subtle::percpu::UsingFlatVirtualCpus() ? "FLAT" : "NONE"); } } // namespace @@ -629,7 +629,7 @@ namespace { extern "C" ABSL_ATTRIBUTE_UNUSED int MallocExtension_Internal_GetStatsInPbtxt( char* buffer, int buffer_length) { ASSERT(buffer_length > 0); - Printer printer(buffer, buffer_length); + Printer printer(buffer, buffer_length); // Print level one stats unless lots of space is available if (buffer_length < 10000) { @@ -642,7 +642,7 @@ extern "C" ABSL_ATTRIBUTE_UNUSED int MallocExtension_Internal_GetStatsInPbtxt( if (buffer_length > required) { absl::base_internal::SpinLockHolder h(&pageheap_lock); - required += GetRegionFactory()->GetStatsInPbtxt( + required += GetRegionFactory()->GetStatsInPbtxt( absl::Span<char>(buffer + required, buffer_length - required)); } @@ -652,7 +652,7 @@ extern "C" ABSL_ATTRIBUTE_UNUSED int MallocExtension_Internal_GetStatsInPbtxt( static void PrintStats(int level) { const int kBufferSize = (TCMALLOC_HAVE_TRACKING ? 2 << 20 : 64 << 10); char* buffer = new char[kBufferSize]; - Printer printer(buffer, kBufferSize); + Printer printer(buffer, kBufferSize); DumpStats(&printer, level); (void)write(STDERR_FILENO, buffer, strlen(buffer)); delete[] buffer; @@ -661,9 +661,9 @@ static void PrintStats(int level) { // This function computes a profile that maps a live stack trace to // the number of bytes of central-cache memory pinned by an allocation // at that stack trace. -static std::unique_ptr<const ProfileBase> DumpFragmentationProfile() { - auto profile = absl::make_unique<StackTraceTable>(ProfileType::kFragmentation, - 1, true, true); +static std::unique_ptr<const ProfileBase> DumpFragmentationProfile() { + auto profile = absl::make_unique<StackTraceTable>(ProfileType::kFragmentation, + 1, true, true); { absl::base_internal::SpinLockHolder h(&pageheap_lock); @@ -700,9 +700,9 @@ static std::unique_ptr<const ProfileBase> DumpFragmentationProfile() { // compensated for (that is, it reports 8000 16-byte objects iff we believe the // program has that many live objects.) Otherwise, do not adjust for sampling // (the caller will do so somehow.) -static std::unique_ptr<const ProfileBase> DumpHeapProfile(bool unsample) { +static std::unique_ptr<const ProfileBase> DumpHeapProfile(bool unsample) { auto profile = absl::make_unique<StackTraceTable>( - ProfileType::kHeap, Sampler::GetSamplePeriod(), true, unsample); + ProfileType::kHeap, Sampler::GetSamplePeriod(), true, unsample); absl::base_internal::SpinLockHolder h(&pageheap_lock); for (Span* s : Static::sampled_objects_) { profile->AddTrace(1.0, *s->sampled_stack()); @@ -712,12 +712,12 @@ static std::unique_ptr<const ProfileBase> DumpHeapProfile(bool unsample) { class AllocationSampleList; -class AllocationSample final : public AllocationProfilingTokenBase { +class AllocationSample final : public AllocationProfilingTokenBase { public: AllocationSample(); ~AllocationSample() override; - Profile Stop() && override; + Profile Stop() && override; private: std::unique_ptr<StackTraceTable> mallocs_; @@ -760,7 +760,7 @@ class AllocationSampleList { AllocationSample::AllocationSample() { mallocs_ = absl::make_unique<StackTraceTable>( - ProfileType::kAllocations, Sampler::GetSamplePeriod(), true, true); + ProfileType::kAllocations, Sampler::GetSamplePeriod(), true, true); absl::base_internal::SpinLockHolder h(&pageheap_lock); allocation_samples_.Add(this); } @@ -777,14 +777,14 @@ AllocationSample::~AllocationSample() { } } -Profile AllocationSample::Stop() && ABSL_LOCKS_EXCLUDED(pageheap_lock) { +Profile AllocationSample::Stop() && ABSL_LOCKS_EXCLUDED(pageheap_lock) { // We need to remove ourselves from the allocation_samples_ list before we // mutate mallocs_; if (mallocs_) { absl::base_internal::SpinLockHolder h(&pageheap_lock); allocation_samples_.Remove(this); } - return ProfileAccessor::MakeProfile(std::move(mallocs_)); + return ProfileAccessor::MakeProfile(std::move(mallocs_)); } extern "C" void MallocExtension_Internal_GetStats(std::string* ret) { @@ -808,7 +808,7 @@ extern "C" void MallocExtension_Internal_GetStats(std::string* ret) { extern "C" size_t TCMalloc_Internal_GetStats(char* buffer, size_t buffer_length) { - Printer printer(buffer, buffer_length); + Printer printer(buffer, buffer_length); if (buffer_length < 10000) { DumpStats(&printer, 1); } else { @@ -816,34 +816,34 @@ extern "C" size_t TCMalloc_Internal_GetStats(char* buffer, } printer.printf("\nLow-level allocator stats:\n"); - printer.printf("Memory Release Failures: %d\n", SystemReleaseErrors()); + printer.printf("Memory Release Failures: %d\n", SystemReleaseErrors()); size_t n = printer.SpaceRequired(); size_t bytes_remaining = buffer_length > n ? buffer_length - n : 0; if (bytes_remaining > 0) { - n += GetRegionFactory()->GetStats( + n += GetRegionFactory()->GetStats( absl::Span<char>(buffer + n, bytes_remaining)); } return n; } -extern "C" const ProfileBase* MallocExtension_Internal_SnapshotCurrent( - ProfileType type) { +extern "C" const ProfileBase* MallocExtension_Internal_SnapshotCurrent( + ProfileType type) { switch (type) { - case ProfileType::kHeap: + case ProfileType::kHeap: return DumpHeapProfile(true).release(); - case ProfileType::kFragmentation: + case ProfileType::kFragmentation: return DumpFragmentationProfile().release(); - case ProfileType::kPeakHeap: + case ProfileType::kPeakHeap: return Static::peak_heap_tracker().DumpSample().release(); default: return nullptr; } } -extern "C" AllocationProfilingTokenBase* +extern "C" AllocationProfilingTokenBase* MallocExtension_Internal_StartAllocationProfiling() { return new AllocationSample(); } @@ -903,13 +903,13 @@ bool GetNumericProperty(const char* name_data, size_t name_size, return true; } - if (name == "tcmalloc.sharded_transfer_cache_free") { - TCMallocStats stats; - ExtractTCMallocStats(&stats, false); - *value = stats.sharded_transfer_bytes; - return true; - } - + if (name == "tcmalloc.sharded_transfer_cache_free") { + TCMallocStats stats; + ExtractTCMallocStats(&stats, false); + *value = stats.sharded_transfer_bytes; + return true; + } + if (name == "tcmalloc.slack_bytes") { // Kept for backwards compatibility. Now defined externally as: // pageheap_free_bytes + pageheap_unmapped_bytes. @@ -963,18 +963,18 @@ bool GetNumericProperty(const char* name_data, size_t name_size, if (name == "tcmalloc.local_bytes") { TCMallocStats stats; ExtractTCMallocStats(&stats, false); - *value = - stats.thread_bytes + stats.per_cpu_bytes + stats.sharded_transfer_bytes; - ; + *value = + stats.thread_bytes + stats.per_cpu_bytes + stats.sharded_transfer_bytes; + ; return true; } if (name == "tcmalloc.external_fragmentation_bytes") { TCMallocStats stats; ExtractTCMallocStats(&stats, false); - *value = (stats.pageheap.free_bytes + stats.central_bytes + - stats.per_cpu_bytes + stats.sharded_transfer_bytes + - stats.transfer_bytes + stats.thread_bytes + stats.metadata_bytes); + *value = (stats.pageheap.free_bytes + stats.central_bytes + + stats.per_cpu_bytes + stats.sharded_transfer_bytes + + stats.transfer_bytes + stats.thread_bytes + stats.metadata_bytes); return true; } @@ -1024,11 +1024,11 @@ bool GetNumericProperty(const char* name_data, size_t name_size, return false; } -MallocExtension::Ownership GetOwnership(const void* ptr) { +MallocExtension::Ownership GetOwnership(const void* ptr) { const PageId p = PageIdContaining(ptr); return Static::pagemap().GetDescriptor(p) - ? MallocExtension::Ownership::kOwned - : MallocExtension::Ownership::kNotOwned; + ? MallocExtension::Ownership::kOwned + : MallocExtension::Ownership::kNotOwned; } extern "C" bool MallocExtension_Internal_GetNumericProperty( @@ -1037,21 +1037,21 @@ extern "C" bool MallocExtension_Internal_GetNumericProperty( } extern "C" void MallocExtension_Internal_GetMemoryLimit( - MallocExtension::MemoryLimit* limit) { + MallocExtension::MemoryLimit* limit) { ASSERT(limit != nullptr); std::tie(limit->limit, limit->hard) = Static::page_allocator().limit(); } extern "C" void MallocExtension_Internal_SetMemoryLimit( - const MallocExtension::MemoryLimit* limit) { + const MallocExtension::MemoryLimit* limit) { ASSERT(limit != nullptr); if (!limit->hard) { - Parameters::set_heap_size_hard_limit(0); - Static::page_allocator().set_limit(limit->limit, false /* !hard */); + Parameters::set_heap_size_hard_limit(0); + Static::page_allocator().set_limit(limit->limit, false /* !hard */); } else { - Parameters::set_heap_size_hard_limit(limit->limit); + Parameters::set_heap_size_hard_limit(limit->limit); } } @@ -1059,15 +1059,15 @@ extern "C" void MallocExtension_Internal_MarkThreadIdle() { ThreadCache::BecomeIdle(); } -extern "C" AddressRegionFactory* MallocExtension_Internal_GetRegionFactory() { +extern "C" AddressRegionFactory* MallocExtension_Internal_GetRegionFactory() { absl::base_internal::SpinLockHolder h(&pageheap_lock); - return GetRegionFactory(); + return GetRegionFactory(); } extern "C" void MallocExtension_Internal_SetRegionFactory( - AddressRegionFactory* factory) { + AddressRegionFactory* factory) { absl::base_internal::SpinLockHolder h(&pageheap_lock); - SetRegionFactory(factory); + SetRegionFactory(factory); } // ReleaseMemoryToSystem drops the page heap lock while actually calling to @@ -1101,7 +1101,7 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( if (num_bytes > 0) { // A sub-page size request may round down to zero. Assume the caller wants // some memory released. - num_pages = BytesToLengthCeil(num_bytes); + num_pages = BytesToLengthCeil(num_bytes); ASSERT(num_pages > Length(0)); } else { num_pages = Length(0); @@ -1117,40 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem( } } -extern "C" void MallocExtension_EnableForkSupport() { - Static::EnableForkSupport(); -} - -void TCMallocPreFork() { - if (!Static::ForkSupportEnabled()) { - return; - } - - if (Static::CPUCacheActive()) { - Static::cpu_cache().AcquireInternalLocks(); - } - Static::transfer_cache().AcquireInternalLocks(); - guarded_page_lock.Lock(); - release_lock.Lock(); - pageheap_lock.Lock(); - AcquireSystemAllocLock(); -} - -void TCMallocPostFork() { - if (!Static::ForkSupportEnabled()) { - return; - } - - ReleaseSystemAllocLock(); - pageheap_lock.Unlock(); - guarded_page_lock.Unlock(); - release_lock.Unlock(); - Static::transfer_cache().ReleaseInternalLocks(); - if (Static::CPUCacheActive()) { - Static::cpu_cache().ReleaseInternalLocks(); - } -} - +extern "C" void MallocExtension_EnableForkSupport() { + Static::EnableForkSupport(); +} + +void TCMallocPreFork() { + if (!Static::ForkSupportEnabled()) { + return; + } + + if (Static::CPUCacheActive()) { + Static::cpu_cache().AcquireInternalLocks(); + } + Static::transfer_cache().AcquireInternalLocks(); + guarded_page_lock.Lock(); + release_lock.Lock(); + pageheap_lock.Lock(); + AcquireSystemAllocLock(); +} + +void TCMallocPostFork() { + if (!Static::ForkSupportEnabled()) { + return; + } + + ReleaseSystemAllocLock(); + pageheap_lock.Unlock(); + guarded_page_lock.Unlock(); + release_lock.Unlock(); + Static::transfer_cache().ReleaseInternalLocks(); + if (Static::CPUCacheActive()) { + Static::cpu_cache().ReleaseInternalLocks(); + } +} + extern "C" void MallocExtension_SetSampleUserDataCallbacks( MallocExtension::CreateSampleUserDataCallback create, MallocExtension::CopySampleUserDataCallback copy, @@ -1168,12 +1168,12 @@ static ABSL_ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) { Static::InitIfNecessary(); size_t align = static_cast<size_t>(1ull << (flags & 0x3f)); uint32_t cl; - if (ABSL_PREDICT_TRUE(Static::sizemap().GetSizeClass( - CppPolicy().AlignAs(align), size, &cl))) { + if (ABSL_PREDICT_TRUE(Static::sizemap().GetSizeClass( + CppPolicy().AlignAs(align), size, &cl))) { ASSERT(cl != 0); return Static::sizemap().class_to_size(cl); } else { - return BytesToLengthCeil(size).in_bytes(); + return BytesToLengthCeil(size).in_bytes(); } } @@ -1187,22 +1187,22 @@ extern "C" size_t nallocx(size_t size, int flags) noexcept { return nallocx_slow(size, flags); } uint32_t cl; - if (ABSL_PREDICT_TRUE( - Static::sizemap().GetSizeClass(CppPolicy(), size, &cl))) { + if (ABSL_PREDICT_TRUE( + Static::sizemap().GetSizeClass(CppPolicy(), size, &cl))) { ASSERT(cl != 0); return Static::sizemap().class_to_size(cl); } else { - return BytesToLengthCeil(size).in_bytes(); + return BytesToLengthCeil(size).in_bytes(); } } -extern "C" MallocExtension::Ownership MallocExtension_Internal_GetOwnership( - const void* ptr) { - return GetOwnership(ptr); +extern "C" MallocExtension::Ownership MallocExtension_Internal_GetOwnership( + const void* ptr) { + return GetOwnership(ptr); } extern "C" void MallocExtension_Internal_GetProperties( - std::map<std::string, MallocExtension::Property>* result) { + std::map<std::string, MallocExtension::Property>* result) { TCMallocStats stats; ExtractTCMallocStats(&stats, true); @@ -1229,8 +1229,8 @@ extern "C" void MallocExtension_Internal_GetProperties( (*result)["tcmalloc.transfer_cache_free"].value = stats.transfer_bytes; // Per CPU Cache Free List (*result)["tcmalloc.cpu_free"].value = stats.per_cpu_bytes; - (*result)["tcmalloc.sharded_transfer_cache_free"].value = - stats.sharded_transfer_bytes; + (*result)["tcmalloc.sharded_transfer_cache_free"].value = + stats.sharded_transfer_bytes; (*result)["tcmalloc.per_cpu_caches_active"].value = Static::CPUCacheActive(); // Thread Cache Free List (*result)["tcmalloc.thread_cache_free"].value = stats.thread_bytes; @@ -1243,8 +1243,8 @@ extern "C" void MallocExtension_Internal_GetProperties( (*result)["tcmalloc.page_algorithm"].value = Static::page_allocator().algorithm(); - FillExperimentProperties(result); - tracking::GetProperties(result); + FillExperimentProperties(result); + tracking::GetProperties(result); } extern "C" size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu) { @@ -1311,7 +1311,7 @@ inline void SetClassCapacity(const void* ptr, uint32_t cl, size_t* psize) { inline void SetPagesCapacity(const void*, size_t, std::nullptr_t) {} inline void SetPagesCapacity(const void* ptr, size_t size, size_t* psize) { if (ABSL_PREDICT_TRUE(ptr != nullptr)) { - *psize = BytesToLengthCeil(size).in_bytes(); + *psize = BytesToLengthCeil(size).in_bytes(); } else { *psize = 0; } @@ -1345,14 +1345,14 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreeSmall(void* ptr, size_t cl) { if (ABSL_PREDICT_FALSE(!GetThreadSampler()->IsOnFastPath())) { // Take the slow path. - invoke_delete_hooks_and_free<FreeSmallSlow, hooks_state>(ptr, cl); + invoke_delete_hooks_and_free<FreeSmallSlow, hooks_state>(ptr, cl); return; } #ifndef TCMALLOC_DEPRECATED_PERTHREAD // The CPU Cache is enabled, so we're able to take the fastpath. ASSERT(Static::CPUCacheActive()); - ASSERT(subtle::percpu::IsFastNoInit()); + ASSERT(subtle::percpu::IsFastNoInit()); Static::cpu_cache().Deallocate(ptr, cl); #else // TCMALLOC_DEPRECATED_PERTHREAD @@ -1381,7 +1381,7 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreeSmall(void* ptr, // function prologue/epilogue. ABSL_ATTRIBUTE_NOINLINE static void FreeSmallSlow(void* ptr, size_t cl) { - if (ABSL_PREDICT_TRUE(UsePerCpuCache())) { + if (ABSL_PREDICT_TRUE(UsePerCpuCache())) { Static::cpu_cache().Deallocate(ptr, cl); } else if (ThreadCache* cache = ThreadCache::GetCacheIfPresent()) { // TODO(b/134691947): If we reach this path from the ThreadCache fastpath, @@ -1391,7 +1391,7 @@ static void FreeSmallSlow(void* ptr, size_t cl) { } else { // This thread doesn't have thread-cache yet or already. Delete directly // into central cache. - Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&ptr, 1)); + Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&ptr, 1)); } } @@ -1459,10 +1459,10 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight, allocated_size = Static::sizemap().class_to_size(cl); // If the caller didn't provide a span, allocate one: - Length num_pages = BytesToLengthCeil(allocated_size); + Length num_pages = BytesToLengthCeil(allocated_size); if ((guarded_alloc = TrySampleGuardedAllocation( requested_size, requested_alignment, num_pages))) { - ASSERT(IsSampledMemory(guarded_alloc)); + ASSERT(IsSampledMemory(guarded_alloc)); const PageId p = PageIdContaining(guarded_alloc); absl::base_internal::SpinLockHolder h(&pageheap_lock); span = Span::New(p, num_pages); @@ -1502,7 +1502,7 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight, // Grab the stack trace outside the heap lock StackTrace tmp; tmp.proxy = proxy; - tmp.depth = absl::GetStackTrace(tmp.stack, kMaxStackDepth, 1); + tmp.depth = absl::GetStackTrace(tmp.stack, kMaxStackDepth, 1); tmp.requested_size = requested_size; tmp.requested_alignment = requested_alignment; tmp.allocated_size = allocated_size; @@ -1513,12 +1513,12 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight, absl::base_internal::SpinLockHolder h(&pageheap_lock); // Allocate stack trace StackTrace* stack = Static::stacktrace_allocator().New(); - allocation_samples_.ReportMalloc(tmp); - *stack = tmp; - span->Sample(stack); + allocation_samples_.ReportMalloc(tmp); + *stack = tmp; + span->Sample(stack); } - Static::peak_heap_tracker().MaybeSaveSample(); + Static::peak_heap_tracker().MaybeSaveSample(); if (obj != nullptr) { #if TCMALLOC_HAVE_TRACKING @@ -1530,7 +1530,7 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight, // TODO(b/158678747): As of cl/315283185, we may occasionally see a hit in // the TransferCache here. Prior to that CL, we always forced a miss. Both // of these may artificially skew our tracking data. - Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&obj, 1)); + Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&obj, 1)); #else // We are not maintaining precise statistics on malloc hit/miss rates at our // cache tiers. We can deallocate into our ordinary cache. @@ -1551,18 +1551,18 @@ inline size_t ShouldSampleAllocation(size_t size) { return GetThreadSampler()->RecordAllocation(size); } -template <typename Policy> -inline void* do_malloc_pages(Policy policy, size_t size) { +template <typename Policy> +inline void* do_malloc_pages(Policy policy, size_t size) { // Page allocator does not deal well with num_pages = 0. - Length num_pages = std::max<Length>(BytesToLengthCeil(size), Length(1)); + Length num_pages = std::max<Length>(BytesToLengthCeil(size), Length(1)); MemoryTag tag = MemoryTag::kNormal; - if (Static::numa_topology().numa_aware()) { - tag = NumaNormalTag(policy.numa_partition()); - } - const size_t alignment = policy.align(); + if (Static::numa_topology().numa_aware()) { + tag = NumaNormalTag(policy.numa_partition()); + } + const size_t alignment = policy.align(); Span* span = Static::page_allocator().NewAligned( - num_pages, BytesToLengthCeil(alignment), tag); + num_pages, BytesToLengthCeil(alignment), tag); if (span == nullptr) { return nullptr; @@ -1570,7 +1570,7 @@ inline void* do_malloc_pages(Policy policy, size_t size) { void* result = span->start_address(); ASSERT( - tag == GetMemoryTag(span->start_address())); + tag == GetMemoryTag(span->start_address())); if (size_t weight = ShouldSampleAllocation(size)) { CHECK_CONDITION(result == SampleifyAllocation(size, weight, alignment, 0, @@ -1587,7 +1587,7 @@ inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE AllocSmall(Policy policy, size_t cl, ASSERT(cl != 0); void* result; - if (UsePerCpuCache()) { + if (UsePerCpuCache()) { result = Static::cpu_cache().Allocate<Policy::handle_oom>(cl); } else { result = ThreadCache::GetCache()->Allocate<Policy::handle_oom>(cl); @@ -1622,8 +1622,8 @@ static void do_free_pages(void* ptr, const PageId p) { Span* span = Static::pagemap().GetExistingDescriptor(p); ASSERT(span != nullptr); - // Prefetch now to avoid a stall accessing *span while under the lock. - span->Prefetch(); + // Prefetch now to avoid a stall accessing *span while under the lock. + span->Prefetch(); { absl::base_internal::SpinLockHolder h(&pageheap_lock); ASSERT(span->first_page() == p); @@ -1631,16 +1631,16 @@ static void do_free_pages(void* ptr, const PageId p) { proxy = st->proxy; size = st->allocated_size; if (proxy == nullptr && size <= kMaxSize) { - tracking::Report(kFreeMiss, - Static::sizemap().SizeClass( - CppPolicy().InSameNumaPartitionAs(ptr), size), - 1); + tracking::Report(kFreeMiss, + Static::sizemap().SizeClass( + CppPolicy().InSameNumaPartitionAs(ptr), size), + 1); } notify_sampled_alloc = true; Static::DestroySampleUserData(st->user_data); Static::stacktrace_allocator().Delete(st); } - if (IsSampledMemory(ptr)) { + if (IsSampledMemory(ptr)) { if (Static::guardedpage_allocator().PointerIsMine(ptr)) { // Release lock while calling Deallocate() since it does a system call. pageheap_lock.Unlock(); @@ -1651,9 +1651,9 @@ static void do_free_pages(void* ptr, const PageId p) { ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); Static::page_allocator().Delete(span, MemoryTag::kSampled); } - } else if (kNumaPartitions != 1) { - ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); - Static::page_allocator().Delete(span, GetMemoryTag(ptr)); + } else if (kNumaPartitions != 1) { + ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); + Static::page_allocator().Delete(span, GetMemoryTag(ptr)); } else { ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); Static::page_allocator().Delete(span, MemoryTag::kNormal); @@ -1664,8 +1664,8 @@ static void do_free_pages(void* ptr, const PageId p) { } if (proxy) { - const auto policy = CppPolicy().InSameNumaPartitionAs(proxy); - const size_t cl = Static::sizemap().SizeClass(policy, size); + const auto policy = CppPolicy().InSameNumaPartitionAs(proxy); + const size_t cl = Static::sizemap().SizeClass(policy, size); FreeSmall<Hooks::NO>(proxy, cl); } } @@ -1718,7 +1718,7 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_cl(void* ptr, size_t cl) { ASSERT(!Static::pagemap().GetExistingDescriptor(p)->sampled()); FreeSmall<hooks_state>(ptr, cl); } else { - invoke_delete_hooks_and_free<do_free_pages, hooks_state>(ptr, p); + invoke_delete_hooks_and_free<do_free_pages, hooks_state>(ptr, p); } } @@ -1737,7 +1737,7 @@ bool CorrectAlignment(void* ptr, std::align_val_t alignment); inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreePages(void* ptr) { const PageId p = PageIdContaining(ptr); - invoke_delete_hooks_and_free<do_free_pages, Hooks::RUN>(ptr, p); + invoke_delete_hooks_and_free<do_free_pages, Hooks::RUN>(ptr, p); } template <typename AlignPolicy> @@ -1754,7 +1754,7 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_size(void* ptr, // // The optimized path doesn't work with sampled objects, whose deletions // trigger more operations and require to visit metadata. - if (ABSL_PREDICT_FALSE(IsSampledMemory(ptr))) { + if (ABSL_PREDICT_FALSE(IsSampledMemory(ptr))) { // we don't know true class size of the ptr if (ptr == nullptr) return; return FreePages(ptr); @@ -1768,9 +1768,9 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_size(void* ptr, ASSERT(ptr != nullptr); uint32_t cl; - if (ABSL_PREDICT_FALSE(!Static::sizemap().GetSizeClass( - CppPolicy().AlignAs(align.align()).InSameNumaPartitionAs(ptr), size, - &cl))) { + if (ABSL_PREDICT_FALSE(!Static::sizemap().GetSizeClass( + CppPolicy().AlignAs(align.align()).InSameNumaPartitionAs(ptr), size, + &cl))) { // We couldn't calculate the size class, which means size > kMaxSize. ASSERT(size > kMaxSize || align.align() > alignof(std::max_align_t)); static_assert(kMaxSize >= kPageSize, "kMaxSize must be at least kPageSize"); @@ -1810,11 +1810,11 @@ bool CorrectSize(void* ptr, size_t size, AlignPolicy align) { // Round-up passed in size to how much tcmalloc allocates for that size. if (Static::guardedpage_allocator().PointerIsMine(ptr)) { size = Static::guardedpage_allocator().GetRequestedSize(ptr); - } else if (Static::sizemap().GetSizeClass(CppPolicy().AlignAs(align.align()), - size, &cl)) { + } else if (Static::sizemap().GetSizeClass(CppPolicy().AlignAs(align.align()), + size, &cl)) { size = Static::sizemap().class_to_size(cl); } else { - size = BytesToLengthCeil(size).in_bytes(); + size = BytesToLengthCeil(size).in_bytes(); } size_t actual = GetSize(ptr); if (ABSL_PREDICT_TRUE(actual == size)) return true; @@ -1825,7 +1825,7 @@ bool CorrectSize(void* ptr, size_t size, AlignPolicy align) { // Checks that an asserted object <ptr> has <align> alignment. bool CorrectAlignment(void* ptr, std::align_val_t alignment) { size_t align = static_cast<size_t>(alignment); - ASSERT(absl::has_single_bit(align)); + ASSERT(absl::has_single_bit(align)); return ((reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0); } @@ -1837,7 +1837,7 @@ inline int do_mallopt(int cmd, int value) { return 1; // Indicates error } -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO inline struct mallinfo do_mallinfo() { TCMallocStats stats; ExtractTCMallocStats(&stats, false); @@ -1857,33 +1857,33 @@ inline struct mallinfo do_mallinfo() { return info; } -#endif // TCMALLOC_HAVE_STRUCT_MALLINFO +#endif // TCMALLOC_HAVE_STRUCT_MALLINFO } // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -using tcmalloc::tcmalloc_internal::AllocSmall; -using tcmalloc::tcmalloc_internal::CppPolicy; -using tcmalloc::tcmalloc_internal::do_free_no_hooks; -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO -using tcmalloc::tcmalloc_internal::do_mallinfo; -#endif -using tcmalloc::tcmalloc_internal::do_malloc_pages; -using tcmalloc::tcmalloc_internal::do_malloc_stats; -using tcmalloc::tcmalloc_internal::do_mallopt; -using tcmalloc::tcmalloc_internal::GetThreadSampler; -using tcmalloc::tcmalloc_internal::MallocPolicy; -using tcmalloc::tcmalloc_internal::SetClassCapacity; -using tcmalloc::tcmalloc_internal::SetPagesCapacity; -using tcmalloc::tcmalloc_internal::Static; -using tcmalloc::tcmalloc_internal::UsePerCpuCache; - -#ifdef TCMALLOC_DEPRECATED_PERTHREAD -using tcmalloc::tcmalloc_internal::ThreadCache; -#endif // TCMALLOC_DEPRECATED_PERTHREAD - +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +using tcmalloc::tcmalloc_internal::AllocSmall; +using tcmalloc::tcmalloc_internal::CppPolicy; +using tcmalloc::tcmalloc_internal::do_free_no_hooks; +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO +using tcmalloc::tcmalloc_internal::do_mallinfo; +#endif +using tcmalloc::tcmalloc_internal::do_malloc_pages; +using tcmalloc::tcmalloc_internal::do_malloc_stats; +using tcmalloc::tcmalloc_internal::do_mallopt; +using tcmalloc::tcmalloc_internal::GetThreadSampler; +using tcmalloc::tcmalloc_internal::MallocPolicy; +using tcmalloc::tcmalloc_internal::SetClassCapacity; +using tcmalloc::tcmalloc_internal::SetPagesCapacity; +using tcmalloc::tcmalloc_internal::Static; +using tcmalloc::tcmalloc_internal::UsePerCpuCache; + +#ifdef TCMALLOC_DEPRECATED_PERTHREAD +using tcmalloc::tcmalloc_internal::ThreadCache; +#endif // TCMALLOC_DEPRECATED_PERTHREAD + // Slow path implementation. // This function is used by `fast_alloc` if the allocation requires page sized // allocations or some complex logic is required such as initialization, @@ -1898,11 +1898,11 @@ static void* ABSL_ATTRIBUTE_SECTION(google_malloc) GetThreadSampler()->UpdateFastPathState(); void* p; uint32_t cl; - bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl); + bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl); if (ABSL_PREDICT_TRUE(is_small)) { p = AllocSmall(policy, cl, size, capacity); } else { - p = do_malloc_pages(policy, size); + p = do_malloc_pages(policy, size); // Set capacity to the exact size for a page allocation. // This needs to be revisited if we introduce gwp-asan // sampling / guarded allocations to do_malloc_pages(). @@ -1925,7 +1925,7 @@ fast_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) { // (regardless of size), but in this case should also delegate to the slow // path by the fast path check further down. uint32_t cl; - bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl); + bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl); if (ABSL_PREDICT_FALSE(!is_small)) { return slow_alloc(policy, size, capacity); } @@ -1971,12 +1971,12 @@ fast_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) { return ret; } -using tcmalloc::tcmalloc_internal::GetOwnership; -using tcmalloc::tcmalloc_internal::GetSize; - +using tcmalloc::tcmalloc_internal::GetOwnership; +using tcmalloc::tcmalloc_internal::GetSize; + extern "C" size_t MallocExtension_Internal_GetAllocatedSize(const void* ptr) { - ASSERT(!ptr || - GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned); + ASSERT(!ptr || + GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned); return GetSize(ptr); } @@ -1985,7 +1985,7 @@ extern "C" void MallocExtension_Internal_MarkThreadBusy() { // invoking any hooks. Static::InitIfNecessary(); - if (UsePerCpuCache()) { + if (UsePerCpuCache()) { return; } @@ -1996,18 +1996,18 @@ extern "C" void MallocExtension_Internal_MarkThreadBusy() { // Exported routines //------------------------------------------------------------------- -using tcmalloc::tcmalloc_internal::AlignAsPolicy; -using tcmalloc::tcmalloc_internal::CorrectAlignment; -using tcmalloc::tcmalloc_internal::CorrectSize; -using tcmalloc::tcmalloc_internal::DefaultAlignPolicy; -using tcmalloc::tcmalloc_internal::do_free; -using tcmalloc::tcmalloc_internal::do_free_with_size; - -// depends on TCMALLOC_HAVE_STRUCT_MALLINFO, so needs to come after that. +using tcmalloc::tcmalloc_internal::AlignAsPolicy; +using tcmalloc::tcmalloc_internal::CorrectAlignment; +using tcmalloc::tcmalloc_internal::CorrectSize; +using tcmalloc::tcmalloc_internal::DefaultAlignPolicy; +using tcmalloc::tcmalloc_internal::do_free; +using tcmalloc::tcmalloc_internal::do_free_with_size; + +// depends on TCMALLOC_HAVE_STRUCT_MALLINFO, so needs to come after that. #include "tcmalloc/libc_override.h" extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc( - size_t size) noexcept { + size_t size) noexcept { // Use TCMallocInternalMemalign to avoid requiring size % // alignof(std::max_align_t) == 0. TCMallocInternalAlignedAlloc enforces this // property. @@ -2026,7 +2026,7 @@ extern "C" ABSL_ATTRIBUTE_SECTION(google_malloc) tcmalloc::sized_ptr_t } extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc_aligned( - size_t size, std::align_val_t alignment) noexcept { + size_t size, std::align_val_t alignment) noexcept { return fast_alloc(MallocPolicy().AlignAs(alignment), size); } @@ -2053,12 +2053,12 @@ extern "C" ABSL_ATTRIBUTE_SECTION( #endif // TCMALLOC_ALIAS extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalFree( - void* ptr) noexcept { + void* ptr) noexcept { do_free(ptr); } extern "C" void TCMallocInternalSdallocx(void* ptr, size_t size, - int flags) noexcept { + int flags) noexcept { size_t alignment = alignof(std::max_align_t); if (ABSL_PREDICT_FALSE(flags != 0)) { @@ -2066,10 +2066,10 @@ extern "C" void TCMallocInternalSdallocx(void* ptr, size_t size, alignment = static_cast<size_t>(1ull << (flags & 0x3f)); } - return do_free_with_size(ptr, size, AlignAsPolicy(alignment)); + return do_free_with_size(ptr, size, AlignAsPolicy(alignment)); } -extern "C" void* TCMallocInternalCalloc(size_t n, size_t elem_size) noexcept { +extern "C" void* TCMallocInternalCalloc(size_t n, size_t elem_size) noexcept { // Overflow check const size_t size = n * elem_size; if (elem_size != 0 && size / elem_size != n) { @@ -2085,7 +2085,7 @@ extern "C" void* TCMallocInternalCalloc(size_t n, size_t elem_size) noexcept { // Here and below we use TCMALLOC_ALIAS (if supported) to make // identical functions aliases. This saves space in L1 instruction // cache. As of now it saves ~9K. -extern "C" void TCMallocInternalCfree(void* ptr) noexcept +extern "C" void TCMallocInternalCfree(void* ptr) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalFree); #else @@ -2143,7 +2143,7 @@ static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* do_realloc(void* old_ptr, } extern "C" void* TCMallocInternalRealloc(void* old_ptr, - size_t new_size) noexcept { + size_t new_size) noexcept { if (old_ptr == NULL) { return fast_alloc(MallocPolicy(), new_size); } @@ -2155,7 +2155,7 @@ extern "C" void* TCMallocInternalRealloc(void* old_ptr, } extern "C" void* TCMallocInternalNewNothrow(size_t size, - const std::nothrow_t&) noexcept { + const std::nothrow_t&) noexcept { return fast_alloc(CppPolicy().Nothrow(), size); } @@ -2166,7 +2166,7 @@ extern "C" tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow( return {p, capacity}; } -extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDelete(void* p) noexcept +extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDelete(void* p) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalFree); #else @@ -2176,7 +2176,7 @@ extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDelete(void* p) noexcept #endif // TCMALLOC_ALIAS extern "C" void TCMallocInternalDeleteAligned( - void* p, std::align_val_t alignment) noexcept + void* p, std::align_val_t alignment) noexcept #if defined(TCMALLOC_ALIAS) && defined(NDEBUG) TCMALLOC_ALIAS(TCMallocInternalDelete); #else @@ -2190,27 +2190,27 @@ extern "C" void TCMallocInternalDeleteAligned( #endif extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDeleteSized( - void* p, size_t size) noexcept { - ASSERT(CorrectSize(p, size, DefaultAlignPolicy())); - do_free_with_size(p, size, DefaultAlignPolicy()); + void* p, size_t size) noexcept { + ASSERT(CorrectSize(p, size, DefaultAlignPolicy())); + do_free_with_size(p, size, DefaultAlignPolicy()); } extern "C" void TCMallocInternalDeleteSizedAligned( - void* p, size_t t, std::align_val_t alignment) noexcept { - return do_free_with_size(p, t, AlignAsPolicy(alignment)); + void* p, size_t t, std::align_val_t alignment) noexcept { + return do_free_with_size(p, t, AlignAsPolicy(alignment)); } -extern "C" void TCMallocInternalDeleteArraySized(void* p, size_t size) noexcept +extern "C" void TCMallocInternalDeleteArraySized(void* p, size_t size) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalDeleteSized); #else { - do_free_with_size(p, size, DefaultAlignPolicy()); + do_free_with_size(p, size, DefaultAlignPolicy()); } #endif extern "C" void TCMallocInternalDeleteArraySizedAligned( - void* p, size_t t, std::align_val_t alignment) noexcept + void* p, size_t t, std::align_val_t alignment) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned); #else @@ -2223,7 +2223,7 @@ extern "C" void TCMallocInternalDeleteArraySizedAligned( // (via ::operator delete(ptr, nothrow)). // But it's really the same as normal delete, so we just do the same thing. extern "C" void TCMallocInternalDeleteNothrow(void* p, - const std::nothrow_t&) noexcept + const std::nothrow_t&) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalFree); #else @@ -2234,7 +2234,7 @@ extern "C" void TCMallocInternalDeleteNothrow(void* p, #if defined(TCMALLOC_ALIAS) && defined(NDEBUG) extern "C" void TCMallocInternalDeleteAligned_nothrow( - void* p, std::align_val_t alignment, const std::nothrow_t& nt) noexcept + void* p, std::align_val_t alignment, const std::nothrow_t& nt) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete); #else extern "C" ABSL_ATTRIBUTE_SECTION( @@ -2242,7 +2242,7 @@ extern "C" ABSL_ATTRIBUTE_SECTION( std::align_val_t alignment, const std::nothrow_t& - nt) noexcept { + nt) noexcept { ASSERT(CorrectAlignment(p, alignment)); return TCMallocInternalDelete(p); } @@ -2253,7 +2253,7 @@ extern "C" void* TCMallocInternalNewArray(size_t size) TCMALLOC_ALIAS(TCMallocInternalNew); #else { - return fast_alloc(CppPolicy().WithoutHooks(), size); + return fast_alloc(CppPolicy().WithoutHooks(), size); } #endif // TCMALLOC_ALIAS @@ -2268,7 +2268,7 @@ extern "C" void* TCMallocInternalNewArrayAligned(size_t size, #endif extern "C" void* TCMallocInternalNewArrayNothrow(size_t size, - const std::nothrow_t&) noexcept + const std::nothrow_t&) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalNewNothrow); #else @@ -2294,7 +2294,7 @@ extern "C" ABSL_ATTRIBUTE_SECTION( } #endif -extern "C" void TCMallocInternalDeleteArray(void* p) noexcept +extern "C" void TCMallocInternalDeleteArray(void* p) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalFree); #else @@ -2304,7 +2304,7 @@ extern "C" void TCMallocInternalDeleteArray(void* p) noexcept #endif // TCMALLOC_ALIAS extern "C" void TCMallocInternalDeleteArrayAligned( - void* p, std::align_val_t alignment) noexcept + void* p, std::align_val_t alignment) noexcept #if defined(TCMALLOC_ALIAS) && defined(NDEBUG) TCMALLOC_ALIAS(TCMallocInternalDelete); #else @@ -2315,7 +2315,7 @@ extern "C" void TCMallocInternalDeleteArrayAligned( #endif extern "C" void TCMallocInternalDeleteArrayNothrow( - void* p, const std::nothrow_t&) noexcept + void* p, const std::nothrow_t&) noexcept #ifdef TCMALLOC_ALIAS TCMALLOC_ALIAS(TCMallocInternalFree); #else @@ -2326,7 +2326,7 @@ extern "C" void TCMallocInternalDeleteArrayNothrow( #if defined(TCMALLOC_ALIAS) && defined(NDEBUG) extern "C" void TCMallocInternalDeleteArrayAligned_nothrow( - void* p, std::align_val_t alignment, const std::nothrow_t&) noexcept + void* p, std::align_val_t alignment, const std::nothrow_t&) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete); #else extern "C" ABSL_ATTRIBUTE_SECTION( @@ -2334,19 +2334,19 @@ extern "C" ABSL_ATTRIBUTE_SECTION( std::align_val_t alignment, const std:: - nothrow_t&) noexcept { + nothrow_t&) noexcept { ASSERT(CorrectAlignment(p, alignment)); return TCMallocInternalDelete(p); } #endif -extern "C" void* TCMallocInternalMemalign(size_t align, size_t size) noexcept { - ASSERT(absl::has_single_bit(align)); +extern "C" void* TCMallocInternalMemalign(size_t align, size_t size) noexcept { + ASSERT(absl::has_single_bit(align)); return fast_alloc(MallocPolicy().AlignAs(align), size); } extern "C" void* TCMallocInternalAlignedAlloc(size_t align, - size_t size) noexcept + size_t size) noexcept #if defined(TCMALLOC_ALIAS) && defined(NDEBUG) TCMALLOC_ALIAS(TCMallocInternalMemalign); #else @@ -2362,8 +2362,8 @@ extern "C" void* TCMallocInternalAlignedAlloc(size_t align, #endif extern "C" int TCMallocInternalPosixMemalign(void** result_ptr, size_t align, - size_t size) noexcept { - if (((align % sizeof(void*)) != 0) || !absl::has_single_bit(align)) { + size_t size) noexcept { + if (((align % sizeof(void*)) != 0) || !absl::has_single_bit(align)) { return EINVAL; } void* result = fast_alloc(MallocPolicy().Nothrow().AlignAs(align), size); @@ -2377,13 +2377,13 @@ extern "C" int TCMallocInternalPosixMemalign(void** result_ptr, size_t align, static size_t pagesize = 0; -extern "C" void* TCMallocInternalValloc(size_t size) noexcept { +extern "C" void* TCMallocInternalValloc(size_t size) noexcept { // Allocate page-aligned object of length >= size bytes if (pagesize == 0) pagesize = getpagesize(); return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size); } -extern "C" void* TCMallocInternalPvalloc(size_t size) noexcept { +extern "C" void* TCMallocInternalPvalloc(size_t size) noexcept { // Round up size to a multiple of pagesize if (pagesize == 0) pagesize = getpagesize(); if (size == 0) { // pvalloc(0) should allocate one page, according to @@ -2393,30 +2393,30 @@ extern "C" void* TCMallocInternalPvalloc(size_t size) noexcept { return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size); } -extern "C" void TCMallocInternalMallocStats(void) noexcept { +extern "C" void TCMallocInternalMallocStats(void) noexcept { do_malloc_stats(); } -extern "C" int TCMallocInternalMallOpt(int cmd, int value) noexcept { +extern "C" int TCMallocInternalMallOpt(int cmd, int value) noexcept { return do_mallopt(cmd, value); } -#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO -extern "C" struct mallinfo TCMallocInternalMallocInfo(void) noexcept { +#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO +extern "C" struct mallinfo TCMallocInternalMallocInfo(void) noexcept { return do_mallinfo(); } #endif -extern "C" size_t TCMallocInternalMallocSize(void* ptr) noexcept { - ASSERT(GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned); +extern "C" size_t TCMallocInternalMallocSize(void* ptr) noexcept { + ASSERT(GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned); return GetSize(ptr); } -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { -namespace { - +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { +namespace { + // The constructor allocates an object to ensure that initialization // runs before main(), and therefore we do not have a chance to become // multi-threaded before initialization. We also create the TSD key @@ -2434,8 +2434,8 @@ class TCMallocGuard { }; static TCMallocGuard module_enter_exit_hook; - -} // namespace -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END + +} // namespace +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h index 1a8eeb4157..e17bd43ba0 100644 --- a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h +++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h @@ -26,14 +26,14 @@ #include <stddef.h> #include "absl/base/attributes.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" #include "tcmalloc/internal/declarations.h" // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional // optimization tool, but we may need to use it to match glibc prototypes. #ifndef __THROW // I guess we're not on a glibc system -#define __THROW __attribute__((__nothrow__)) +#define __THROW __attribute__((__nothrow__)) #endif #ifdef __cplusplus @@ -68,7 +68,7 @@ void TCMallocInternalMallocStats(void) __THROW ABSL_ATTRIBUTE_SECTION(google_malloc); int TCMallocInternalMallOpt(int cmd, int value) __THROW ABSL_ATTRIBUTE_SECTION(google_malloc); -#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO) +#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO) struct mallinfo TCMallocInternalMallocInfo(void) __THROW ABSL_ATTRIBUTE_SECTION(google_malloc); #endif @@ -120,7 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW } #endif -void TCMallocInternalAcquireLocks(); -void TCMallocInternalReleaseLocks(); - +void TCMallocInternalAcquireLocks(); +void TCMallocInternalReleaseLocks(); + #endif // TCMALLOC_TCMALLOC_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc index f940120f46..fac4c5bb56 100644 --- a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc @@ -25,14 +25,14 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/container/flat_hash_set.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/node_hash_set.h" #include "tcmalloc/common.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/malloc_extension.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { // Alloc a size that should always fail. @@ -142,7 +142,7 @@ class LargeAllocationTest : public ::testing::Test { TEST_F(LargeAllocationTest, UniqueAddresses) { constexpr int kZeroTimes = 1024; - absl::flat_hash_set<void*> ptrs; + absl::flat_hash_set<void*> ptrs; for (int i = 0; i < kZeroTimes; ++i) { void* p = malloc(1); ASSERT_NE(p, nullptr); @@ -200,5 +200,5 @@ TEST_F(LargeAllocationTest, NearMaxAddressBits) { } } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h index d81f8f3be0..111ac66829 100644 --- a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h +++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h @@ -14,7 +14,7 @@ // // This file defines policies used when allocation memory. // -// An allocation policy encapsulates four policies: +// An allocation policy encapsulates four policies: // // - Out of memory policy. // Dictates how to handle OOM conditions. @@ -43,20 +43,20 @@ // // Returns true if allocation hooks must be invoked. // static bool invoke_hooks(); // }; -// -// - NUMA partition policy -// When NUMA awareness is enabled this dictates which NUMA partition we will -// allocate memory from. Must be trivially copyable. -// -// struct NumaPartitionPolicyTemplate { -// // Returns the NUMA partition to allocate from. -// size_t partition() const; -// -// // Returns the NUMA partition to allocate from multiplied by -// // kNumBaseClasses - i.e. the first size class that corresponds to the -// // NUMA partition to allocate from. -// size_t scaled_partition() const; -// }; +// +// - NUMA partition policy +// When NUMA awareness is enabled this dictates which NUMA partition we will +// allocate memory from. Must be trivially copyable. +// +// struct NumaPartitionPolicyTemplate { +// // Returns the NUMA partition to allocate from. +// size_t partition() const; +// +// // Returns the NUMA partition to allocate from multiplied by +// // kNumBaseClasses - i.e. the first size class that corresponds to the +// // NUMA partition to allocate from. +// size_t scaled_partition() const; +// }; #ifndef TCMALLOC_TCMALLOC_POLICY_H_ #define TCMALLOC_TCMALLOC_POLICY_H_ @@ -68,13 +68,13 @@ #include <cstddef> #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/numa.h" -#include "tcmalloc/internal/percpu.h" -#include "tcmalloc/static_vars.h" +#include "tcmalloc/internal/numa.h" +#include "tcmalloc/internal/percpu.h" +#include "tcmalloc/static_vars.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // NullOomPolicy: returns nullptr struct NullOomPolicy { @@ -143,47 +143,47 @@ struct NoHooksPolicy { static constexpr bool invoke_hooks() { return false; } }; -// Use a fixed NUMA partition. -class FixedNumaPartitionPolicy { - public: - explicit constexpr FixedNumaPartitionPolicy(size_t partition) - : partition_(partition) {} - - size_t constexpr partition() const { return partition_; } - - size_t constexpr scaled_partition() const { - return partition_ * kNumBaseClasses; - } - - private: - size_t partition_; -}; - -// Use the NUMA partition which the executing CPU is local to. -struct LocalNumaPartitionPolicy { - // Note that the partition returned may change between calls if the executing - // thread migrates between NUMA nodes & partitions. Users of this function - // should not rely upon multiple invocations returning the same partition. - size_t partition() const { - return Static::numa_topology().GetCurrentPartition(); - } - size_t scaled_partition() const { - return Static::numa_topology().GetCurrentScaledPartition(); - } -}; - +// Use a fixed NUMA partition. +class FixedNumaPartitionPolicy { + public: + explicit constexpr FixedNumaPartitionPolicy(size_t partition) + : partition_(partition) {} + + size_t constexpr partition() const { return partition_; } + + size_t constexpr scaled_partition() const { + return partition_ * kNumBaseClasses; + } + + private: + size_t partition_; +}; + +// Use the NUMA partition which the executing CPU is local to. +struct LocalNumaPartitionPolicy { + // Note that the partition returned may change between calls if the executing + // thread migrates between NUMA nodes & partitions. Users of this function + // should not rely upon multiple invocations returning the same partition. + size_t partition() const { + return Static::numa_topology().GetCurrentPartition(); + } + size_t scaled_partition() const { + return Static::numa_topology().GetCurrentScaledPartition(); + } +}; + // TCMallocPolicy defines the compound policy object containing // the OOM, alignment and hooks policies. // Is trivially constructible, copyable and destructible. template <typename OomPolicy = CppOomPolicy, typename AlignPolicy = DefaultAlignPolicy, - typename HooksPolicy = InvokeHooksPolicy, - typename NumaPolicy = LocalNumaPartitionPolicy> + typename HooksPolicy = InvokeHooksPolicy, + typename NumaPolicy = LocalNumaPartitionPolicy> class TCMallocPolicy { public: constexpr TCMallocPolicy() = default; - explicit constexpr TCMallocPolicy(AlignPolicy align, NumaPolicy numa) - : align_(align), numa_(numa) {} + explicit constexpr TCMallocPolicy(AlignPolicy align, NumaPolicy numa) + : align_(align), numa_(numa) {} // OOM policy static void* handle_oom(size_t size) { return OomPolicy::handle_oom(size); } @@ -191,70 +191,70 @@ class TCMallocPolicy { // Alignment policy constexpr size_t align() const { return align_.align(); } - // NUMA partition - constexpr size_t numa_partition() const { return numa_.partition(); } - - // NUMA partition multiplied by kNumBaseClasses - constexpr size_t scaled_numa_partition() const { - return numa_.scaled_partition(); - } - + // NUMA partition + constexpr size_t numa_partition() const { return numa_.partition(); } + + // NUMA partition multiplied by kNumBaseClasses + constexpr size_t scaled_numa_partition() const { + return numa_.scaled_partition(); + } + // Hooks policy static constexpr bool invoke_hooks() { return HooksPolicy::invoke_hooks(); } // Returns this policy aligned as 'align' template <typename align_t> - constexpr TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy> - AlignAs( + constexpr TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy> + AlignAs( align_t align) const { - return TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy>( - AlignAsPolicy{align}, numa_); + return TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy>( + AlignAsPolicy{align}, numa_); } // Returns this policy with a nullptr OOM policy. - constexpr TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy, - NumaPolicy> Nothrow() + constexpr TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy, + NumaPolicy> Nothrow() const { - return TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy, - NumaPolicy>(align_, numa_); + return TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy, + NumaPolicy>(align_, numa_); } // Returns this policy with NewAllocHook invocations disabled. - constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, NumaPolicy> + constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, NumaPolicy> WithoutHooks() const { - return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, - NumaPolicy>(align_, numa_); - } - - // Returns this policy with a fixed NUMA partition. - constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, - FixedNumaPartitionPolicy> InNumaPartition(size_t partition) const { - return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, - FixedNumaPartitionPolicy>( - align_, FixedNumaPartitionPolicy{partition}); - } - - // Returns this policy with a fixed NUMA partition matching that of the - // previously allocated `ptr`. - constexpr auto InSameNumaPartitionAs(void* ptr) const { - return InNumaPartition(NumaPartitionFromPointer(ptr)); + return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, + NumaPolicy>(align_, numa_); } + // Returns this policy with a fixed NUMA partition. + constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, + FixedNumaPartitionPolicy> InNumaPartition(size_t partition) const { + return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, + FixedNumaPartitionPolicy>( + align_, FixedNumaPartitionPolicy{partition}); + } + + // Returns this policy with a fixed NUMA partition matching that of the + // previously allocated `ptr`. + constexpr auto InSameNumaPartitionAs(void* ptr) const { + return InNumaPartition(NumaPartitionFromPointer(ptr)); + } + static constexpr bool can_return_nullptr() { return OomPolicy::can_return_nullptr(); } private: AlignPolicy align_; - NumaPolicy numa_; + NumaPolicy numa_; }; using CppPolicy = TCMallocPolicy<CppOomPolicy, DefaultAlignPolicy>; using MallocPolicy = TCMallocPolicy<MallocOomPolicy, MallocAlignPolicy>; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_TCMALLOC_POLICY_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc b/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc index 89cc779af1..0469e9a49e 100644 --- a/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc @@ -20,9 +20,9 @@ #include "absl/base/macros.h" #include "tcmalloc/transfer_cache.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; @@ -148,14 +148,14 @@ void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) { src->PopBatch(batch_size, batch); static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, "not enough space in batch"); - Static::transfer_cache().InsertRange(cl, - absl::Span<void*>(batch, batch_size)); + Static::transfer_cache().InsertRange(cl, + absl::Span<void*>(batch, batch_size)); N -= batch_size; } src->PopBatch(N, batch); static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove, "not enough space in batch"); - Static::transfer_cache().InsertRange(cl, absl::Span<void*>(batch, N)); + Static::transfer_cache().InsertRange(cl, absl::Span<void*>(batch, N)); size_ -= delta_bytes; } @@ -412,6 +412,6 @@ void ThreadCache::set_overall_thread_cache_size(size_t new_size) { RecomputePerThreadCacheSize(); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache.h b/contrib/libs/tcmalloc/tcmalloc/thread_cache.h index ae6cef869f..48d89d61b6 100644 --- a/contrib/libs/tcmalloc/tcmalloc/thread_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache.h @@ -32,9 +32,9 @@ #include "tcmalloc/static_vars.h" #include "tcmalloc/tracking.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { //------------------------------------------------------------------- // Data kept per thread @@ -338,8 +338,8 @@ inline ThreadCache* ThreadCache::GetCache() { return (ABSL_PREDICT_TRUE(tc != nullptr)) ? tc : CreateCacheIfNecessary(); } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_THREAD_CACHE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc index 5b2d10b2ac..f2231b3183 100644 --- a/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc @@ -25,7 +25,7 @@ #include "gtest/gtest.h" #include "absl/strings/str_cat.h" -#include "benchmark/benchmark.h" +#include "benchmark/benchmark.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/internal/memory_stats.h" #include "tcmalloc/internal/parameter_accessors.h" diff --git a/contrib/libs/tcmalloc/tcmalloc/tracking.h b/contrib/libs/tcmalloc/tcmalloc/tracking.h index 68d4c59b9c..43ec993b86 100644 --- a/contrib/libs/tcmalloc/tcmalloc/tracking.h +++ b/contrib/libs/tcmalloc/tcmalloc/tracking.h @@ -47,9 +47,9 @@ #ifndef TCMALLOC_TRACK_ALLOCS // #define TCMALLOC_TRACK_ALLOCS #endif -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { #if 1 #define TCMALLOC_HAVE_TRACKING 0 @@ -72,8 +72,8 @@ enum TrackingStat { kTCInsertMiss = 7, // # of times the object list misses the transfer cache. kTCRemoveHit = 8, // # of times object list fetching hits transfer cache. kTCRemoveMiss = 9, // # of times object list fetching misses transfer cache. - kTCElementsPlunder = 10, // # of elements plundered from the transfer cache. - kNumTrackingStats = 11, + kTCElementsPlunder = 10, // # of elements plundered from the transfer cache. + kNumTrackingStats = 11, }; namespace tracking { @@ -83,7 +83,7 @@ void Report(TrackingStat stat, size_t cl, ssize_t count); // Dump all tracking data to <out>. We could support various other // mechanisms for data delivery without too much trouble... -void Print(Printer* out); +void Print(Printer* out); // Call on startup during tcmalloc initialization. void Init(); @@ -95,15 +95,15 @@ void GetProperties(std::map<std::string, MallocExtension::Property>* result); #if !TCMALLOC_HAVE_TRACKING // no tracking, these are all no-ops inline void Report(TrackingStat stat, size_t cl, ssize_t count) {} -inline void Print(Printer* out) {} +inline void Print(Printer* out) {} inline void Init() {} inline void GetProperties( std::map<std::string, MallocExtension::Property>* result) {} #endif } // namespace tracking -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_TRACKING_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc index efde485288..9138af43f8 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc @@ -14,7 +14,7 @@ #include "tcmalloc/transfer_cache.h" -#include <fcntl.h> +#include <fcntl.h> #include <string.h> #include <algorithm> @@ -24,128 +24,128 @@ #include "tcmalloc/common.h" #include "tcmalloc/experiment.h" #include "tcmalloc/guarded_page_allocator.h" -#include "tcmalloc/internal/cache_topology.h" -#include "tcmalloc/internal/environment.h" +#include "tcmalloc/internal/cache_topology.h" +#include "tcmalloc/internal/environment.h" #include "tcmalloc/internal/linked_list.h" #include "tcmalloc/internal/logging.h" -#include "tcmalloc/internal/optimization.h" -#include "tcmalloc/internal/util.h" +#include "tcmalloc/internal/optimization.h" +#include "tcmalloc/internal/util.h" #include "tcmalloc/static_vars.h" #include "tcmalloc/tracking.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - -absl::string_view TransferCacheImplementationToLabel( - TransferCacheImplementation type) { - switch (type) { - case TransferCacheImplementation::Legacy: - return "LEGACY"; - case TransferCacheImplementation::None: - return "NO_TRANSFERCACHE"; - case TransferCacheImplementation::Ring: - return "RING"; - default: - ASSUME(false); - } -} - +namespace tcmalloc_internal { + +absl::string_view TransferCacheImplementationToLabel( + TransferCacheImplementation type) { + switch (type) { + case TransferCacheImplementation::Legacy: + return "LEGACY"; + case TransferCacheImplementation::None: + return "NO_TRANSFERCACHE"; + case TransferCacheImplementation::Ring: + return "RING"; + default: + ASSUME(false); + } +} + #ifndef TCMALLOC_SMALL_BUT_SLOW -size_t StaticForwarder::class_to_size(int size_class) { +size_t StaticForwarder::class_to_size(int size_class) { return Static::sizemap().class_to_size(size_class); } -size_t StaticForwarder::num_objects_to_move(int size_class) { +size_t StaticForwarder::num_objects_to_move(int size_class) { return Static::sizemap().num_objects_to_move(size_class); } -void *StaticForwarder::Alloc(size_t size, int alignment) { - return Static::arena().Alloc(size, alignment); -} - -void ShardedTransferCacheManager::Init() { - if (!IsExperimentActive( - Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE)) { - return; - } - num_shards_ = BuildCpuToL3CacheMap(l3_cache_index_); - cache_ = reinterpret_cast<Cache *>(Static::arena().Alloc( - sizeof(Cache) * kNumClasses * num_shards_, ABSL_CACHELINE_SIZE)); - ASSERT(cache_ != nullptr); - for (int shard = 0; shard < num_shards_; ++shard) { - for (int cl = 0; cl < kNumClasses; ++cl) { - const int index = shard * kNumClasses + cl; - const int size_per_object = Static::sizemap().class_to_size(cl); - static constexpr int k12MB = 12 << 20; - static constexpr int min_size = 4096; - const int use_this_size_class = size_per_object >= min_size; - const int capacity = use_this_size_class ? k12MB / size_per_object : 0; - active_for_class_[cl] = use_this_size_class; - new (&cache_[index].tc) - TransferCache(nullptr, capacity > 0 ? cl : 0, {capacity, capacity}); - cache_[index].tc.freelist().Init(cl); - } - } -} - -size_t ShardedTransferCacheManager::TotalBytes() { - if (cache_ == nullptr) return 0; - size_t out = 0; - for (int shard = 0; shard < num_shards_; ++shard) { - for (int cl = 0; cl < kNumClasses; ++cl) { - const int bytes_per_entry = Static::sizemap().class_to_size(cl); - if (bytes_per_entry <= 0) continue; - const int index = shard * kNumClasses + cl; - out += cache_[index].tc.tc_length() * bytes_per_entry; - } - } - return out; -} - -void ShardedTransferCacheManager::BackingTransferCache::InsertRange( - absl::Span<void *> batch) const { - Static::transfer_cache().InsertRange(size_class_, batch); -} - -ABSL_MUST_USE_RESULT int -ShardedTransferCacheManager::BackingTransferCache::RemoveRange(void **batch, - int n) const { - return Static::transfer_cache().RemoveRange(size_class_, batch, n); -} - -TransferCacheImplementation TransferCacheManager::ChooseImplementation() { - // Prefer ring, if we're forcing it on. - if (IsExperimentActive( - Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE)) { - return TransferCacheImplementation::Ring; - } - - // Consider opt-outs - const char *e = thread_safe_getenv("TCMALLOC_INTERNAL_TRANSFERCACHE_CONTROL"); - if (e) { - if (e[0] == '0') { - return TransferCacheImplementation::Legacy; - } - if (e[0] == '1') { - return TransferCacheImplementation::Ring; - } - Crash(kCrash, __FILE__, __LINE__, "bad env var", e); - } - - // Otherwise, default to ring. - return TransferCacheImplementation::Ring; +void *StaticForwarder::Alloc(size_t size, int alignment) { + return Static::arena().Alloc(size, alignment); } +void ShardedTransferCacheManager::Init() { + if (!IsExperimentActive( + Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE)) { + return; + } + num_shards_ = BuildCpuToL3CacheMap(l3_cache_index_); + cache_ = reinterpret_cast<Cache *>(Static::arena().Alloc( + sizeof(Cache) * kNumClasses * num_shards_, ABSL_CACHELINE_SIZE)); + ASSERT(cache_ != nullptr); + for (int shard = 0; shard < num_shards_; ++shard) { + for (int cl = 0; cl < kNumClasses; ++cl) { + const int index = shard * kNumClasses + cl; + const int size_per_object = Static::sizemap().class_to_size(cl); + static constexpr int k12MB = 12 << 20; + static constexpr int min_size = 4096; + const int use_this_size_class = size_per_object >= min_size; + const int capacity = use_this_size_class ? k12MB / size_per_object : 0; + active_for_class_[cl] = use_this_size_class; + new (&cache_[index].tc) + TransferCache(nullptr, capacity > 0 ? cl : 0, {capacity, capacity}); + cache_[index].tc.freelist().Init(cl); + } + } +} + +size_t ShardedTransferCacheManager::TotalBytes() { + if (cache_ == nullptr) return 0; + size_t out = 0; + for (int shard = 0; shard < num_shards_; ++shard) { + for (int cl = 0; cl < kNumClasses; ++cl) { + const int bytes_per_entry = Static::sizemap().class_to_size(cl); + if (bytes_per_entry <= 0) continue; + const int index = shard * kNumClasses + cl; + out += cache_[index].tc.tc_length() * bytes_per_entry; + } + } + return out; +} + +void ShardedTransferCacheManager::BackingTransferCache::InsertRange( + absl::Span<void *> batch) const { + Static::transfer_cache().InsertRange(size_class_, batch); +} + +ABSL_MUST_USE_RESULT int +ShardedTransferCacheManager::BackingTransferCache::RemoveRange(void **batch, + int n) const { + return Static::transfer_cache().RemoveRange(size_class_, batch, n); +} + +TransferCacheImplementation TransferCacheManager::ChooseImplementation() { + // Prefer ring, if we're forcing it on. + if (IsExperimentActive( + Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE)) { + return TransferCacheImplementation::Ring; + } + + // Consider opt-outs + const char *e = thread_safe_getenv("TCMALLOC_INTERNAL_TRANSFERCACHE_CONTROL"); + if (e) { + if (e[0] == '0') { + return TransferCacheImplementation::Legacy; + } + if (e[0] == '1') { + return TransferCacheImplementation::Ring; + } + Crash(kCrash, __FILE__, __LINE__, "bad env var", e); + } + + // Otherwise, default to ring. + return TransferCacheImplementation::Ring; +} + int TransferCacheManager::DetermineSizeClassToEvict() { int t = next_to_evict_.load(std::memory_order_relaxed); if (t >= kNumClasses) t = 1; next_to_evict_.store(t + 1, std::memory_order_relaxed); // Ask nicely first. - if (implementation_ == TransferCacheImplementation::Ring) { - if (cache_[t].rbtc.HasSpareCapacity(t)) return t; + if (implementation_ == TransferCacheImplementation::Ring) { + if (cache_[t].rbtc.HasSpareCapacity(t)) return t; } else { - if (cache_[t].tc.HasSpareCapacity(t)) return t; + if (cache_[t].tc.HasSpareCapacity(t)) return t; } // But insist on the second try. @@ -156,7 +156,7 @@ int TransferCacheManager::DetermineSizeClassToEvict() { } #endif - -} // namespace tcmalloc_internal + +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h index 8b47eefafb..a63fb23beb 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h @@ -19,7 +19,7 @@ #include <stdint.h> #include <atomic> -#include <limits> +#include <limits> #include <utility> #include "absl/base/attributes.h" @@ -30,249 +30,249 @@ #include "absl/types/span.h" #include "tcmalloc/central_freelist.h" #include "tcmalloc/common.h" -#include "tcmalloc/internal/logging.h" +#include "tcmalloc/internal/logging.h" #include "tcmalloc/transfer_cache_stats.h" #ifndef TCMALLOC_SMALL_BUT_SLOW #include "tcmalloc/transfer_cache_internals.h" #endif -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { - -enum class TransferCacheImplementation { - Legacy, - None, - Ring, -}; - -absl::string_view TransferCacheImplementationToLabel( - TransferCacheImplementation type); - +namespace tcmalloc_internal { + +enum class TransferCacheImplementation { + Legacy, + None, + Ring, +}; + +absl::string_view TransferCacheImplementationToLabel( + TransferCacheImplementation type); + #ifndef TCMALLOC_SMALL_BUT_SLOW -class StaticForwarder { - public: - static size_t class_to_size(int size_class); - static size_t num_objects_to_move(int size_class); - static void *Alloc(size_t size, int alignment = kAlignment) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); -}; - -// This transfer-cache is set up to be sharded per L3 cache. It is backed by -// the non-sharded "normal" TransferCacheManager. -class ShardedTransferCacheManager { - public: - constexpr ShardedTransferCacheManager() {} - - void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); - - bool should_use(int cl) const { return active_for_class_[cl]; } - - size_t TotalBytes(); - - void *Pop(int cl) { - void *batch[1]; - const int got = cache_[get_index(cl)].tc.RemoveRange(cl, batch, 1); - return got == 1 ? batch[0] : nullptr; - } - - void Push(int cl, void *ptr) { - cache_[get_index(cl)].tc.InsertRange(cl, {&ptr, 1}); - } - - // All caches not touched since last attempt will return all objects - // to the non-sharded TransferCache. - void Plunder() { - if (cache_ == nullptr || num_shards_ == 0) return; - for (int i = 0; i < num_shards_ * kNumClasses; ++i) { - cache_[i].tc.TryPlunder(cache_[i].tc.freelist().size_class()); - } - } - - private: - // The Manager is set up so that stealing is disabled for this TransferCache. - class Manager : public StaticForwarder { - public: - static constexpr int DetermineSizeClassToEvict() { return -1; } - static constexpr bool MakeCacheSpace(int) { return false; } - static constexpr bool ShrinkCache(int) { return false; } - }; - - // Forwards calls to the unsharded TransferCache. - class BackingTransferCache { - public: - void Init(int cl) { size_class_ = cl; } - void InsertRange(absl::Span<void *> batch) const; - ABSL_MUST_USE_RESULT int RemoveRange(void **batch, int n) const; - int size_class() const { return size_class_; } - - private: - int size_class_ = -1; - }; - - using TransferCache = - internal_transfer_cache::RingBufferTransferCache<BackingTransferCache, - Manager>; - - union Cache { - constexpr Cache() : dummy(false) {} - ~Cache() {} - TransferCache tc; - bool dummy; - }; - - int get_index(int cl) { - const int cpu = tcmalloc::tcmalloc_internal::subtle::percpu::RseqCpuId(); - ASSERT(cpu < 256); - ASSERT(cpu >= 0); - return get_index(cpu, cl); - } - - int get_index(int cpu, int cl) { - const int shard = l3_cache_index_[cpu]; - ASSERT(shard < num_shards_); - const int index = shard * kNumClasses + cl; - ASSERT(index < num_shards_ * kNumClasses); - return index; - } - - // Mapping from cpu to the L3 cache used. - uint8_t l3_cache_index_[CPU_SETSIZE] = {0}; - - Cache *cache_ = nullptr; - int num_shards_ = 0; - bool active_for_class_[kNumClasses] = {false}; -}; - -class TransferCacheManager : public StaticForwarder { +class StaticForwarder { + public: + static size_t class_to_size(int size_class); + static size_t num_objects_to_move(int size_class); + static void *Alloc(size_t size, int alignment = kAlignment) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); +}; + +// This transfer-cache is set up to be sharded per L3 cache. It is backed by +// the non-sharded "normal" TransferCacheManager. +class ShardedTransferCacheManager { + public: + constexpr ShardedTransferCacheManager() {} + + void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock); + + bool should_use(int cl) const { return active_for_class_[cl]; } + + size_t TotalBytes(); + + void *Pop(int cl) { + void *batch[1]; + const int got = cache_[get_index(cl)].tc.RemoveRange(cl, batch, 1); + return got == 1 ? batch[0] : nullptr; + } + + void Push(int cl, void *ptr) { + cache_[get_index(cl)].tc.InsertRange(cl, {&ptr, 1}); + } + + // All caches not touched since last attempt will return all objects + // to the non-sharded TransferCache. + void Plunder() { + if (cache_ == nullptr || num_shards_ == 0) return; + for (int i = 0; i < num_shards_ * kNumClasses; ++i) { + cache_[i].tc.TryPlunder(cache_[i].tc.freelist().size_class()); + } + } + + private: + // The Manager is set up so that stealing is disabled for this TransferCache. + class Manager : public StaticForwarder { + public: + static constexpr int DetermineSizeClassToEvict() { return -1; } + static constexpr bool MakeCacheSpace(int) { return false; } + static constexpr bool ShrinkCache(int) { return false; } + }; + + // Forwards calls to the unsharded TransferCache. + class BackingTransferCache { + public: + void Init(int cl) { size_class_ = cl; } + void InsertRange(absl::Span<void *> batch) const; + ABSL_MUST_USE_RESULT int RemoveRange(void **batch, int n) const; + int size_class() const { return size_class_; } + + private: + int size_class_ = -1; + }; + + using TransferCache = + internal_transfer_cache::RingBufferTransferCache<BackingTransferCache, + Manager>; + + union Cache { + constexpr Cache() : dummy(false) {} + ~Cache() {} + TransferCache tc; + bool dummy; + }; + + int get_index(int cl) { + const int cpu = tcmalloc::tcmalloc_internal::subtle::percpu::RseqCpuId(); + ASSERT(cpu < 256); + ASSERT(cpu >= 0); + return get_index(cpu, cl); + } + + int get_index(int cpu, int cl) { + const int shard = l3_cache_index_[cpu]; + ASSERT(shard < num_shards_); + const int index = shard * kNumClasses + cl; + ASSERT(index < num_shards_ * kNumClasses); + return index; + } + + // Mapping from cpu to the L3 cache used. + uint8_t l3_cache_index_[CPU_SETSIZE] = {0}; + + Cache *cache_ = nullptr; + int num_shards_ = 0; + bool active_for_class_[kNumClasses] = {false}; +}; + +class TransferCacheManager : public StaticForwarder { template <typename CentralFreeList, typename Manager> friend class internal_transfer_cache::TransferCache; using TransferCache = - internal_transfer_cache::TransferCache<tcmalloc_internal::CentralFreeList, + internal_transfer_cache::TransferCache<tcmalloc_internal::CentralFreeList, TransferCacheManager>; template <typename CentralFreeList, typename Manager> - friend class internal_transfer_cache::RingBufferTransferCache; - using RingBufferTransferCache = - internal_transfer_cache::RingBufferTransferCache< - tcmalloc_internal::CentralFreeList, TransferCacheManager>; + friend class internal_transfer_cache::RingBufferTransferCache; + using RingBufferTransferCache = + internal_transfer_cache::RingBufferTransferCache< + tcmalloc_internal::CentralFreeList, TransferCacheManager>; public: - constexpr TransferCacheManager() : next_to_evict_(1) {} + constexpr TransferCacheManager() : next_to_evict_(1) {} TransferCacheManager(const TransferCacheManager &) = delete; TransferCacheManager &operator=(const TransferCacheManager &) = delete; void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) { - implementation_ = ChooseImplementation(); - for (int i = 0; i < kNumClasses; ++i) { - if (implementation_ == TransferCacheImplementation::Ring) { - new (&cache_[i].rbtc) RingBufferTransferCache(this, i); - } else { - new (&cache_[i].tc) TransferCache(this, i); - } - } - } - - void AcquireInternalLocks() { + implementation_ = ChooseImplementation(); for (int i = 0; i < kNumClasses; ++i) { - if (implementation_ == TransferCacheImplementation::Ring) { - cache_[i].rbtc.AcquireInternalLocks(); + if (implementation_ == TransferCacheImplementation::Ring) { + new (&cache_[i].rbtc) RingBufferTransferCache(this, i); } else { - cache_[i].tc.AcquireInternalLocks(); + new (&cache_[i].tc) TransferCache(this, i); } } } - void ReleaseInternalLocks() { - for (int i = 0; i < kNumClasses; ++i) { - if (implementation_ == TransferCacheImplementation::Ring) { - cache_[i].rbtc.ReleaseInternalLocks(); - } else { - cache_[i].tc.ReleaseInternalLocks(); - } - } - } - - void InsertRange(int size_class, absl::Span<void *> batch) { - if (implementation_ == TransferCacheImplementation::Ring) { - cache_[size_class].rbtc.InsertRange(size_class, batch); - } else { - cache_[size_class].tc.InsertRange(size_class, batch); - } + void AcquireInternalLocks() { + for (int i = 0; i < kNumClasses; ++i) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[i].rbtc.AcquireInternalLocks(); + } else { + cache_[i].tc.AcquireInternalLocks(); + } + } + } + + void ReleaseInternalLocks() { + for (int i = 0; i < kNumClasses; ++i) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[i].rbtc.ReleaseInternalLocks(); + } else { + cache_[i].tc.ReleaseInternalLocks(); + } + } + } + + void InsertRange(int size_class, absl::Span<void *> batch) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[size_class].rbtc.InsertRange(size_class, batch); + } else { + cache_[size_class].tc.InsertRange(size_class, batch); + } } ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int n) { - if (implementation_ == TransferCacheImplementation::Ring) { - return cache_[size_class].rbtc.RemoveRange(size_class, batch, n); - } else { - return cache_[size_class].tc.RemoveRange(size_class, batch, n); - } + if (implementation_ == TransferCacheImplementation::Ring) { + return cache_[size_class].rbtc.RemoveRange(size_class, batch, n); + } else { + return cache_[size_class].tc.RemoveRange(size_class, batch, n); + } } - // All caches which have not been modified since the last time this method has - // been called will return all objects to the freelist. - void Plunder() { - for (int i = 0; i < kNumClasses; ++i) { - if (implementation_ == TransferCacheImplementation::Ring) { - cache_[i].rbtc.TryPlunder(i); - } else { - cache_[i].tc.TryPlunder(i); - } - } + // All caches which have not been modified since the last time this method has + // been called will return all objects to the freelist. + void Plunder() { + for (int i = 0; i < kNumClasses; ++i) { + if (implementation_ == TransferCacheImplementation::Ring) { + cache_[i].rbtc.TryPlunder(i); + } else { + cache_[i].tc.TryPlunder(i); + } + } } - // This is not const because the underlying ring-buffer transfer cache - // function requires acquiring a lock. + // This is not const because the underlying ring-buffer transfer cache + // function requires acquiring a lock. size_t tc_length(int size_class) { - if (implementation_ == TransferCacheImplementation::Ring) { - return cache_[size_class].rbtc.tc_length(); - } else { - return cache_[size_class].tc.tc_length(); - } + if (implementation_ == TransferCacheImplementation::Ring) { + return cache_[size_class].rbtc.tc_length(); + } else { + return cache_[size_class].tc.tc_length(); + } } - TransferCacheStats GetHitRateStats(int size_class) const { - if (implementation_ == TransferCacheImplementation::Ring) { - return cache_[size_class].rbtc.GetHitRateStats(); - } else { - return cache_[size_class].tc.GetHitRateStats(); - } + TransferCacheStats GetHitRateStats(int size_class) const { + if (implementation_ == TransferCacheImplementation::Ring) { + return cache_[size_class].rbtc.GetHitRateStats(); + } else { + return cache_[size_class].tc.GetHitRateStats(); + } } - const CentralFreeList ¢ral_freelist(int size_class) const { - if (implementation_ == TransferCacheImplementation::Ring) { - return cache_[size_class].rbtc.freelist(); - } else { - return cache_[size_class].tc.freelist(); - } + const CentralFreeList ¢ral_freelist(int size_class) const { + if (implementation_ == TransferCacheImplementation::Ring) { + return cache_[size_class].rbtc.freelist(); + } else { + return cache_[size_class].tc.freelist(); + } } - TransferCacheImplementation implementation() const { return implementation_; } - + TransferCacheImplementation implementation() const { return implementation_; } + private: - static TransferCacheImplementation ChooseImplementation(); - + static TransferCacheImplementation ChooseImplementation(); + int DetermineSizeClassToEvict(); bool ShrinkCache(int size_class) { - if (implementation_ == TransferCacheImplementation::Ring) { - return cache_[size_class].rbtc.ShrinkCache(size_class); - } else { - return cache_[size_class].tc.ShrinkCache(size_class); - } + if (implementation_ == TransferCacheImplementation::Ring) { + return cache_[size_class].rbtc.ShrinkCache(size_class); + } else { + return cache_[size_class].tc.ShrinkCache(size_class); + } } - TransferCacheImplementation implementation_ = - TransferCacheImplementation::Legacy; + TransferCacheImplementation implementation_ = + TransferCacheImplementation::Legacy; std::atomic<int32_t> next_to_evict_; union Cache { constexpr Cache() : dummy(false) {} ~Cache() {} - TransferCache tc; - RingBufferTransferCache rbtc; + TransferCache tc; + RingBufferTransferCache rbtc; bool dummy; }; Cache cache_[kNumClasses]; @@ -293,49 +293,49 @@ class TransferCacheManager { } } - void InsertRange(int size_class, absl::Span<void *> batch) { - freelist_[size_class].InsertRange(batch); + void InsertRange(int size_class, absl::Span<void *> batch) { + freelist_[size_class].InsertRange(batch); } ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int n) { return freelist_[size_class].RemoveRange(batch, n); } - static constexpr size_t tc_length(int size_class) { return 0; } - - static constexpr TransferCacheStats GetHitRateStats(int size_class) { - return {0, 0, 0, 0}; - } + static constexpr size_t tc_length(int size_class) { return 0; } - const CentralFreeList ¢ral_freelist(int size_class) const { - return freelist_[size_class]; + static constexpr TransferCacheStats GetHitRateStats(int size_class) { + return {0, 0, 0, 0}; } - TransferCacheImplementation implementation() const { - return TransferCacheImplementation::None; + const CentralFreeList ¢ral_freelist(int size_class) const { + return freelist_[size_class]; } - void AcquireInternalLocks() {} - void ReleaseInternalLocks() {} - + TransferCacheImplementation implementation() const { + return TransferCacheImplementation::None; + } + + void AcquireInternalLocks() {} + void ReleaseInternalLocks() {} + private: CentralFreeList freelist_[kNumClasses]; } ABSL_CACHELINE_ALIGNED; -// A trivial no-op implementation. -struct ShardedTransferCacheManager { - static constexpr void Init() {} - static constexpr bool should_use(int cl) { return false; } - static constexpr void *Pop(int cl) { return nullptr; } - static constexpr void Push(int cl, void *ptr) {} - static constexpr size_t TotalBytes() { return 0; } - static constexpr void Plunder() {} -}; - +// A trivial no-op implementation. +struct ShardedTransferCacheManager { + static constexpr void Init() {} + static constexpr bool should_use(int cl) { return false; } + static constexpr void *Pop(int cl) { return nullptr; } + static constexpr void Push(int cl, void *ptr) {} + static constexpr size_t TotalBytes() { return 0; } + static constexpr void Plunder() {} +}; + #endif - -} // namespace tcmalloc_internal + +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_TRANSFER_CACHE_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc index 70b1dcffc1..365366cca8 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc @@ -23,18 +23,18 @@ #include "tcmalloc/transfer_cache_internals.h" #include "tcmalloc/transfer_cache_stats.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { using TransferCacheEnv = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache< MinimalFakeCentralFreeList, FakeTransferCacheManager>>; -using RingBufferTransferCacheEnv = FakeTransferCacheEnvironment< - internal_transfer_cache::RingBufferTransferCache<MinimalFakeCentralFreeList, - FakeTransferCacheManager>>; -static constexpr int kSizeClass = 0; +using RingBufferTransferCacheEnv = FakeTransferCacheEnvironment< + internal_transfer_cache::RingBufferTransferCache<MinimalFakeCentralFreeList, + FakeTransferCacheManager>>; +static constexpr int kSizeClass = 0; template <typename Env> void BM_CrossThread(benchmark::State& state) { @@ -44,21 +44,21 @@ void BM_CrossThread(benchmark::State& state) { void* batch[kMaxObjectsToMove]; struct CrossThreadState { - CrossThreadState() : m{}, c{Cache(&m, 1), Cache(&m, 1)} {} - FakeTransferCacheManager m; + CrossThreadState() : m{}, c{Cache(&m, 1), Cache(&m, 1)} {} + FakeTransferCacheManager m; Cache c[2]; }; static CrossThreadState* s = nullptr; if (state.thread_index == 0) { s = new CrossThreadState(); - for (int i = 0; i < ::tcmalloc::tcmalloc_internal::internal_transfer_cache:: - kInitialCapacityInBatches / - 2; - ++i) { + for (int i = 0; i < ::tcmalloc::tcmalloc_internal::internal_transfer_cache:: + kInitialCapacityInBatches / + 2; + ++i) { for (Cache& c : s->c) { c.freelist().AllocateBatch(batch, kBatchSize); - c.InsertRange(kSizeClass, {batch, kBatchSize}); + c.InsertRange(kSizeClass, {batch, kBatchSize}); } } } @@ -67,9 +67,9 @@ void BM_CrossThread(benchmark::State& state) { int dst = (src + 1) % 2; for (auto iter : state) { benchmark::DoNotOptimize(batch); - (void)s->c[src].RemoveRange(kSizeClass, batch, kBatchSize); + (void)s->c[src].RemoveRange(kSizeClass, batch, kBatchSize); benchmark::DoNotOptimize(batch); - s->c[dst].InsertRange(kSizeClass, {batch, kBatchSize}); + s->c[dst].InsertRange(kSizeClass, {batch, kBatchSize}); benchmark::DoNotOptimize(batch); } if (state.thread_index == 0) { @@ -110,7 +110,7 @@ void BM_InsertRange(benchmark::State& state) { benchmark::DoNotOptimize(batch); state.ResumeTiming(); - e->transfer_cache().InsertRange(kSizeClass, {batch, kBatchSize}); + e->transfer_cache().InsertRange(kSizeClass, {batch, kBatchSize}); } } @@ -130,20 +130,20 @@ void BM_RemoveRange(benchmark::State& state) { benchmark::DoNotOptimize(e); state.ResumeTiming(); - (void)e->transfer_cache().RemoveRange(kSizeClass, batch, kBatchSize); + (void)e->transfer_cache().RemoveRange(kSizeClass, batch, kBatchSize); benchmark::DoNotOptimize(batch); } } BENCHMARK_TEMPLATE(BM_CrossThread, TransferCacheEnv)->ThreadRange(2, 64); -BENCHMARK_TEMPLATE(BM_CrossThread, RingBufferTransferCacheEnv) - ->ThreadRange(2, 64); +BENCHMARK_TEMPLATE(BM_CrossThread, RingBufferTransferCacheEnv) + ->ThreadRange(2, 64); BENCHMARK_TEMPLATE(BM_InsertRange, TransferCacheEnv); -BENCHMARK_TEMPLATE(BM_InsertRange, RingBufferTransferCacheEnv); +BENCHMARK_TEMPLATE(BM_InsertRange, RingBufferTransferCacheEnv); BENCHMARK_TEMPLATE(BM_RemoveRange, TransferCacheEnv); -BENCHMARK_TEMPLATE(BM_RemoveRange, RingBufferTransferCacheEnv); +BENCHMARK_TEMPLATE(BM_RemoveRange, RingBufferTransferCacheEnv); } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc index a31b06135e..5b5364ccb1 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc @@ -19,26 +19,26 @@ #include "tcmalloc/mock_transfer_cache.h" #include "tcmalloc/transfer_cache_internals.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { namespace { -using TransferCache = tcmalloc_internal::internal_transfer_cache::TransferCache< - tcmalloc_internal::MockCentralFreeList, - tcmalloc_internal::MockTransferCacheManager>; -using TransferCacheEnv = - tcmalloc_internal::FakeTransferCacheEnvironment<TransferCache>; +using TransferCache = tcmalloc_internal::internal_transfer_cache::TransferCache< + tcmalloc_internal::MockCentralFreeList, + tcmalloc_internal::MockTransferCacheManager>; +using TransferCacheEnv = + tcmalloc_internal::FakeTransferCacheEnvironment<TransferCache>; -using RingBufferTransferCache = - tcmalloc_internal::internal_transfer_cache::RingBufferTransferCache< - tcmalloc_internal::MockCentralFreeList, - tcmalloc_internal::MockTransferCacheManager>; -using RingBufferTransferCacheEnv = - tcmalloc_internal::FakeTransferCacheEnvironment<RingBufferTransferCache>; +using RingBufferTransferCache = + tcmalloc_internal::internal_transfer_cache::RingBufferTransferCache< + tcmalloc_internal::MockCentralFreeList, + tcmalloc_internal::MockTransferCacheManager>; +using RingBufferTransferCacheEnv = + tcmalloc_internal::FakeTransferCacheEnvironment<RingBufferTransferCache>; -template <typename Env> -int RunFuzzer(const uint8_t *data, size_t size) { - Env env; +template <typename Env> +int RunFuzzer(const uint8_t *data, size_t size) { + Env env; for (int i = 0; i < size; ++i) { switch (data[i] % 10) { case 0: @@ -61,13 +61,13 @@ int RunFuzzer(const uint8_t *data, size_t size) { } return 0; } - -} // namespace -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - tcmalloc::RunFuzzer<tcmalloc::TransferCacheEnv>(data, size); - tcmalloc::RunFuzzer<tcmalloc::RingBufferTransferCacheEnv>(data, size); - return 0; -} + +} // namespace +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + tcmalloc::RunFuzzer<tcmalloc::TransferCacheEnv>(data, size); + tcmalloc::RunFuzzer<tcmalloc::RingBufferTransferCacheEnv>(data, size); + return 0; +} diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h index 26d18fd99d..41b017e4ed 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h @@ -19,12 +19,12 @@ #include <stddef.h> #include <stdint.h> -#include <cmath> +#include <cmath> #include <limits> -#include "absl/numeric/bits.h" -#include "tcmalloc/internal/config.h" - +#include "absl/numeric/bits.h" +#include "tcmalloc/internal/config.h" + #ifdef __x86_64__ #include <emmintrin.h> #include <xmmintrin.h> @@ -48,20 +48,20 @@ #include "tcmalloc/central_freelist.h" #include "tcmalloc/common.h" #include "tcmalloc/experiment.h" -#include "tcmalloc/internal/atomic_stats_counter.h" +#include "tcmalloc/internal/atomic_stats_counter.h" #include "tcmalloc/internal/logging.h" #include "tcmalloc/tracking.h" #include "tcmalloc/transfer_cache_stats.h" -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc::tcmalloc_internal::internal_transfer_cache { +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc::tcmalloc_internal::internal_transfer_cache { struct alignas(8) SizeInfo { int32_t used; int32_t capacity; }; -static constexpr int kMaxCapacityInBatches = 64; -static constexpr int kInitialCapacityInBatches = 16; +static constexpr int kMaxCapacityInBatches = 64; +static constexpr int kInitialCapacityInBatches = 16; // TransferCache is used to cache transfers of // sizemap.num_objects_to_move(size_class) back and forth between @@ -72,198 +72,198 @@ class TransferCache { using Manager = TransferCacheManager; using FreeList = CentralFreeList; - TransferCache(Manager *owner, int cl) - : TransferCache(owner, cl, CapacityNeeded(cl)) {} - - struct Capacity { - int capacity; - int max_capacity; - }; + TransferCache(Manager *owner, int cl) + : TransferCache(owner, cl, CapacityNeeded(cl)) {} - TransferCache(Manager *owner, int cl, Capacity capacity) + struct Capacity { + int capacity; + int max_capacity; + }; + + TransferCache(Manager *owner, int cl, Capacity capacity) : owner_(owner), lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY), - max_capacity_(capacity.max_capacity), - slot_info_(SizeInfo({0, capacity.capacity})), - low_water_mark_(std::numeric_limits<int>::max()), + max_capacity_(capacity.max_capacity), + slot_info_(SizeInfo({0, capacity.capacity})), + low_water_mark_(std::numeric_limits<int>::max()), slots_(nullptr), - freelist_do_not_access_directly_() { - freelist().Init(cl); - slots_ = max_capacity_ != 0 ? reinterpret_cast<void **>(owner_->Alloc( - max_capacity_ * sizeof(void *))) - : nullptr; - } + freelist_do_not_access_directly_() { + freelist().Init(cl); + slots_ = max_capacity_ != 0 ? reinterpret_cast<void **>(owner_->Alloc( + max_capacity_ * sizeof(void *))) + : nullptr; + } TransferCache(const TransferCache &) = delete; TransferCache &operator=(const TransferCache &) = delete; - // Compute initial and max capacity that we should configure this cache for. - static Capacity CapacityNeeded(size_t cl) { + // Compute initial and max capacity that we should configure this cache for. + static Capacity CapacityNeeded(size_t cl) { // We need at least 2 slots to store list head and tail. - static_assert(kMinObjectsToMove >= 2); - - const size_t bytes = Manager::class_to_size(cl); - if (cl <= 0 || bytes <= 0) return {0, 0}; - - // Limit the maximum size of the cache based on the size class. If this - // is not done, large size class objects will consume a lot of memory if - // they just sit in the transfer cache. - const size_t objs_to_move = Manager::num_objects_to_move(cl); - ASSERT(objs_to_move > 0); - - // Starting point for the maximum number of entries in the transfer cache. - // This actual maximum for a given size class may be lower than this - // maximum value. - int max_capacity = kMaxCapacityInBatches * objs_to_move; - // A transfer cache freelist can have anywhere from 0 to - // max_capacity_ slots to put link list chains into. - int capacity = kInitialCapacityInBatches * objs_to_move; - - // Limit each size class cache to at most 1MB of objects or one entry, - // whichever is greater. Total transfer cache memory used across all - // size classes then can't be greater than approximately - // 1MB * kMaxNumTransferEntries. - max_capacity = std::min<int>( - max_capacity, - std::max<int>(objs_to_move, - (1024 * 1024) / (bytes * objs_to_move) * objs_to_move)); - capacity = std::min(capacity, max_capacity); - - return {capacity, max_capacity}; - } - - // This transfercache implementation does not deal well with non-batch sized - // inserts and removes. - static constexpr bool IsFlexible() { return false; } - + static_assert(kMinObjectsToMove >= 2); + + const size_t bytes = Manager::class_to_size(cl); + if (cl <= 0 || bytes <= 0) return {0, 0}; + + // Limit the maximum size of the cache based on the size class. If this + // is not done, large size class objects will consume a lot of memory if + // they just sit in the transfer cache. + const size_t objs_to_move = Manager::num_objects_to_move(cl); + ASSERT(objs_to_move > 0); + + // Starting point for the maximum number of entries in the transfer cache. + // This actual maximum for a given size class may be lower than this + // maximum value. + int max_capacity = kMaxCapacityInBatches * objs_to_move; + // A transfer cache freelist can have anywhere from 0 to + // max_capacity_ slots to put link list chains into. + int capacity = kInitialCapacityInBatches * objs_to_move; + + // Limit each size class cache to at most 1MB of objects or one entry, + // whichever is greater. Total transfer cache memory used across all + // size classes then can't be greater than approximately + // 1MB * kMaxNumTransferEntries. + max_capacity = std::min<int>( + max_capacity, + std::max<int>(objs_to_move, + (1024 * 1024) / (bytes * objs_to_move) * objs_to_move)); + capacity = std::min(capacity, max_capacity); + + return {capacity, max_capacity}; + } + + // This transfercache implementation does not deal well with non-batch sized + // inserts and removes. + static constexpr bool IsFlexible() { return false; } + // These methods all do internal locking. // Insert the specified batch into the transfer cache. N is the number of // elements in the range. RemoveRange() is the opposite operation. - void InsertRange(int size_class, absl::Span<void *> batch) - ABSL_LOCKS_EXCLUDED(lock_) { - const int N = batch.size(); - const int B = Manager::num_objects_to_move(size_class); + void InsertRange(int size_class, absl::Span<void *> batch) + ABSL_LOCKS_EXCLUDED(lock_) { + const int N = batch.size(); + const int B = Manager::num_objects_to_move(size_class); ASSERT(0 < N && N <= B); auto info = slot_info_.load(std::memory_order_relaxed); - if (N == B) { - if (info.used + N <= max_capacity_) { - absl::base_internal::SpinLockHolder h(&lock_); - if (MakeCacheSpace(size_class, N)) { - // MakeCacheSpace can drop the lock, so refetch - info = slot_info_.load(std::memory_order_relaxed); - info.used += N; - SetSlotInfo(info); - - void **entry = GetSlot(info.used - N); - memcpy(entry, batch.data(), sizeof(void *) * N); - tracking::Report(kTCInsertHit, size_class, 1); - insert_hits_.LossyAdd(1); - return; - } + if (N == B) { + if (info.used + N <= max_capacity_) { + absl::base_internal::SpinLockHolder h(&lock_); + if (MakeCacheSpace(size_class, N)) { + // MakeCacheSpace can drop the lock, so refetch + info = slot_info_.load(std::memory_order_relaxed); + info.used += N; + SetSlotInfo(info); + + void **entry = GetSlot(info.used - N); + memcpy(entry, batch.data(), sizeof(void *) * N); + tracking::Report(kTCInsertHit, size_class, 1); + insert_hits_.LossyAdd(1); + return; + } } - insert_misses_.Add(1); - } else { - insert_non_batch_misses_.Add(1); + insert_misses_.Add(1); + } else { + insert_non_batch_misses_.Add(1); } - - tracking::Report(kTCInsertMiss, size_class, 1); - freelist().InsertRange(batch); + + tracking::Report(kTCInsertMiss, size_class, 1); + freelist().InsertRange(batch); } // Returns the actual number of fetched elements and stores elements in the // batch. - ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N) + ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N) ABSL_LOCKS_EXCLUDED(lock_) { ASSERT(N > 0); - const int B = Manager::num_objects_to_move(size_class); + const int B = Manager::num_objects_to_move(size_class); auto info = slot_info_.load(std::memory_order_relaxed); - if (N == B) { + if (N == B) { if (info.used >= N) { - absl::base_internal::SpinLockHolder h(&lock_); - // Refetch with the lock - info = slot_info_.load(std::memory_order_relaxed); - if (info.used >= N) { - info.used -= N; - SetSlotInfo(info); - void **entry = GetSlot(info.used); - memcpy(batch, entry, sizeof(void *) * N); - tracking::Report(kTCRemoveHit, size_class, 1); - remove_hits_.LossyAdd(1); - low_water_mark_.store( - std::min(low_water_mark_.load(std::memory_order_acquire), - info.used), - std::memory_order_release); - return N; - } + absl::base_internal::SpinLockHolder h(&lock_); + // Refetch with the lock + info = slot_info_.load(std::memory_order_relaxed); + if (info.used >= N) { + info.used -= N; + SetSlotInfo(info); + void **entry = GetSlot(info.used); + memcpy(batch, entry, sizeof(void *) * N); + tracking::Report(kTCRemoveHit, size_class, 1); + remove_hits_.LossyAdd(1); + low_water_mark_.store( + std::min(low_water_mark_.load(std::memory_order_acquire), + info.used), + std::memory_order_release); + return N; + } } - remove_misses_.Add(1); - } else { - remove_non_batch_misses_.Add(1); - } - low_water_mark_.store(0, std::memory_order_release); - - tracking::Report(kTCRemoveMiss, size_class, 1); - return freelist().RemoveRange(batch, N); - } - - // If this object has not been touched since the last attempt, then - // return all objects to 'freelist()'. - void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { - if (max_capacity_ == 0) return; - int low_water_mark = low_water_mark_.load(std::memory_order_acquire); - low_water_mark_.store(std::numeric_limits<int>::max(), - std::memory_order_release); - while (low_water_mark > 0) { - if (!lock_.TryLock()) return; - if (low_water_mark_.load(std::memory_order_acquire) != - std::numeric_limits<int>::max()) { - lock_.Unlock(); - return; - } - const int B = Manager::num_objects_to_move(size_class); - SizeInfo info = GetSlotInfo(); - if (info.used == 0) { - lock_.Unlock(); - return; - } - const size_t num_to_move = std::min(B, info.used); - void *buf[kMaxObjectsToMove]; - void **const entry = GetSlot(info.used - B); - memcpy(buf, entry, sizeof(void *) * B); - info.used -= num_to_move; - low_water_mark -= num_to_move; + remove_misses_.Add(1); + } else { + remove_non_batch_misses_.Add(1); + } + low_water_mark_.store(0, std::memory_order_release); + + tracking::Report(kTCRemoveMiss, size_class, 1); + return freelist().RemoveRange(batch, N); + } + + // If this object has not been touched since the last attempt, then + // return all objects to 'freelist()'. + void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + if (max_capacity_ == 0) return; + int low_water_mark = low_water_mark_.load(std::memory_order_acquire); + low_water_mark_.store(std::numeric_limits<int>::max(), + std::memory_order_release); + while (low_water_mark > 0) { + if (!lock_.TryLock()) return; + if (low_water_mark_.load(std::memory_order_acquire) != + std::numeric_limits<int>::max()) { + lock_.Unlock(); + return; + } + const int B = Manager::num_objects_to_move(size_class); + SizeInfo info = GetSlotInfo(); + if (info.used == 0) { + lock_.Unlock(); + return; + } + const size_t num_to_move = std::min(B, info.used); + void *buf[kMaxObjectsToMove]; + void **const entry = GetSlot(info.used - B); + memcpy(buf, entry, sizeof(void *) * B); + info.used -= num_to_move; + low_water_mark -= num_to_move; SetSlotInfo(info); - lock_.Unlock(); - tracking::Report(kTCElementsPlunder, size_class, num_to_move); - freelist().InsertRange({buf, num_to_move}); + lock_.Unlock(); + tracking::Report(kTCElementsPlunder, size_class, num_to_move); + freelist().InsertRange({buf, num_to_move}); } } // Returns the number of free objects in the transfer cache. - size_t tc_length() const { + size_t tc_length() const { return static_cast<size_t>(slot_info_.load(std::memory_order_relaxed).used); } // Returns the number of transfer cache insert/remove hits/misses. - TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) { + TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) { TransferCacheStats stats; - - stats.insert_hits = insert_hits_.value(); - stats.remove_hits = remove_hits_.value(); - stats.insert_misses = insert_misses_.value(); - stats.insert_non_batch_misses = insert_non_batch_misses_.value(); - stats.remove_misses = remove_misses_.value(); - stats.remove_non_batch_misses = remove_non_batch_misses_.value(); - - // For performance reasons, we only update a single atomic as part of the - // actual allocation operation. For reporting, we keep reporting all - // misses together and separately break-out how many of those misses were - // non-batch sized. - stats.insert_misses += stats.insert_non_batch_misses; - stats.remove_misses += stats.remove_non_batch_misses; - + + stats.insert_hits = insert_hits_.value(); + stats.remove_hits = remove_hits_.value(); + stats.insert_misses = insert_misses_.value(); + stats.insert_non_batch_misses = insert_non_batch_misses_.value(); + stats.remove_misses = remove_misses_.value(); + stats.remove_non_batch_misses = remove_non_batch_misses_.value(); + + // For performance reasons, we only update a single atomic as part of the + // actual allocation operation. For reporting, we keep reporting all + // misses together and separately break-out how many of those misses were + // non-batch sized. + stats.insert_misses += stats.insert_non_batch_misses; + stats.remove_misses += stats.remove_non_batch_misses; + return stats; } @@ -272,11 +272,11 @@ class TransferCache { } // REQUIRES: lock is held. - // Tries to make room for N elements. If the cache is full it will try to - // expand it at the cost of some other cache size. Return false if there is - // no space. - bool MakeCacheSpace(int size_class, int N) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + // Tries to make room for N elements. If the cache is full it will try to + // expand it at the cost of some other cache size. Return false if there is + // no space. + bool MakeCacheSpace(int size_class, int N) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { auto info = slot_info_.load(std::memory_order_relaxed); // Is there room in the cache? if (info.used + N <= info.capacity) return true; @@ -284,7 +284,7 @@ class TransferCache { if (info.capacity + N > max_capacity_) return false; int to_evict = owner_->DetermineSizeClassToEvict(); - if (to_evict == size_class) return false; + if (to_evict == size_class) return false; // Release the held lock before the other instance tries to grab its lock. lock_.Unlock(); @@ -304,24 +304,24 @@ class TransferCache { return true; } - bool HasSpareCapacity(int size_class) const { - int n = Manager::num_objects_to_move(size_class); + bool HasSpareCapacity(int size_class) const { + int n = Manager::num_objects_to_move(size_class); auto info = GetSlotInfo(); return info.capacity - info.used >= n; } // Takes lock_ and invokes MakeCacheSpace() on this cache. Returns true if it - // succeeded at growing the cache by a batch size. - bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + // succeeded at growing the cache by a batch size. + bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { absl::base_internal::SpinLockHolder h(&lock_); - return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class)); + return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class)); } // REQUIRES: lock_ is *not* held. // Tries to shrink the Cache. Return false if it failed to shrink the cache. // Decreases cache_slots_ on success. - bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { - int N = Manager::num_objects_to_move(size_class); + bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + int N = Manager::num_objects_to_move(size_class); void *to_free[kMaxObjectsToMove]; int num_to_free; @@ -329,7 +329,7 @@ class TransferCache { absl::base_internal::SpinLockHolder h(&lock_); auto info = slot_info_.load(std::memory_order_relaxed); if (info.capacity == 0) return false; - if (info.capacity < N) return false; + if (info.capacity < N) return false; N = std::min(N, info.capacity); int unused = info.capacity - info.used; @@ -350,7 +350,7 @@ class TransferCache { } // Access the freelist without holding the lock. - freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)}); + freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)}); return true; } @@ -366,18 +366,18 @@ class TransferCache { return freelist_do_not_access_directly_; } - void AcquireInternalLocks() - { - freelist().AcquireInternalLocks(); - lock_.Lock(); - } - - void ReleaseInternalLocks() - { - lock_.Unlock(); - freelist().ReleaseInternalLocks(); - } - + void AcquireInternalLocks() + { + freelist().AcquireInternalLocks(); + lock_.Lock(); + } + + void ReleaseInternalLocks() + { + lock_.Unlock(); + freelist().ReleaseInternalLocks(); + } + private: // Returns first object of the i-th slot. void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { @@ -397,30 +397,30 @@ class TransferCache { // may be looked at without holding the lock. absl::base_internal::SpinLock lock_; - // Maximum size of the cache. - const int32_t max_capacity_; - - // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations - // and use LossyAdd, but the thread annotations cannot indicate that we do not - // need a lock for reads. - StatsCounter insert_hits_; - StatsCounter remove_hits_; - // Miss counters do not hold lock_, so they use Add. - StatsCounter insert_misses_; - StatsCounter insert_non_batch_misses_; - StatsCounter remove_misses_; - StatsCounter remove_non_batch_misses_; - - // Number of currently used and available cached entries in slots_. This + // Maximum size of the cache. + const int32_t max_capacity_; + + // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations + // and use LossyAdd, but the thread annotations cannot indicate that we do not + // need a lock for reads. + StatsCounter insert_hits_; + StatsCounter remove_hits_; + // Miss counters do not hold lock_, so they use Add. + StatsCounter insert_misses_; + StatsCounter insert_non_batch_misses_; + StatsCounter remove_misses_; + StatsCounter remove_non_batch_misses_; + + // Number of currently used and available cached entries in slots_. This // variable is updated under a lock but can be read without one. // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_] std::atomic<SizeInfo> slot_info_; - // Lowest value of "slot_info_.used" since last call to TryPlunder. All - // elements not used for a full cycle (2 seconds) are unlikely to get used - // again. - std::atomic<int> low_water_mark_; - + // Lowest value of "slot_info_.used" since last call to TryPlunder. All + // elements not used for a full cycle (2 seconds) are unlikely to get used + // again. + std::atomic<int> low_water_mark_; + // Pointer to array of free objects. Use GetSlot() to get pointers to // entries. void **slots_ ABSL_GUARDED_BY(lock_); @@ -428,469 +428,469 @@ class TransferCache { FreeList freelist_do_not_access_directly_; } ABSL_CACHELINE_ALIGNED; -struct RingBufferSizeInfo { - // The starting index of data stored in the ring buffer. - int32_t start; - // How many elements are stored. - int32_t used; - // How many elements are allowed to be stored at most. - int32_t capacity; -}; - -// RingBufferTransferCache is a transfer cache which stores cache entries in a -// ring buffer instead of a stack. +struct RingBufferSizeInfo { + // The starting index of data stored in the ring buffer. + int32_t start; + // How many elements are stored. + int32_t used; + // How many elements are allowed to be stored at most. + int32_t capacity; +}; + +// RingBufferTransferCache is a transfer cache which stores cache entries in a +// ring buffer instead of a stack. template <typename CentralFreeList, typename TransferCacheManager> -class RingBufferTransferCache { +class RingBufferTransferCache { public: using Manager = TransferCacheManager; using FreeList = CentralFreeList; - - RingBufferTransferCache(Manager *owner, int cl) - : RingBufferTransferCache(owner, cl, CapacityNeeded(cl)) {} - - RingBufferTransferCache( - Manager *owner, int cl, - typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity - capacity) - : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY), - slot_info_(RingBufferSizeInfo({0, 0, capacity.capacity})), - max_capacity_(capacity.max_capacity), - freelist_do_not_access_directly_(), - owner_(owner) { - freelist().Init(cl); - if (max_capacity_ == 0) { - // We don't allocate a buffer. Set slots_bitmask_ to 0 to prevent UB. - slots_bitmask_ = 0; - } else { - const size_t slots_size = absl::bit_ceil<size_t>(max_capacity_); - ASSERT(slots_size >= max_capacity_); - ASSERT(slots_size < max_capacity_ * 2); - slots_ = - reinterpret_cast<void **>(owner_->Alloc(slots_size * sizeof(void *))); - slots_bitmask_ = slots_size - 1; - } - } - - RingBufferTransferCache(const RingBufferTransferCache &) = delete; - RingBufferTransferCache &operator=(const RingBufferTransferCache &) = delete; - - // This transfercache implementation handles non-batch sized - // inserts and removes efficiently. - static constexpr bool IsFlexible() { return true; } - - // These methods all do internal locking. - - void AcquireInternalLocks() - { - freelist().AcquireInternalLocks(); - lock_.Lock(); - } - - void ReleaseInternalLocks() - { - lock_.Unlock(); - freelist().ReleaseInternalLocks(); - } - - // Insert the specified batch into the transfer cache. N is the number of - // elements in the range. RemoveRange() is the opposite operation. - void InsertRange(int size_class, absl::Span<void *> batch) - ABSL_LOCKS_EXCLUDED(lock_) { - const int N = batch.size(); - const int B = Manager::num_objects_to_move(size_class); - ASSERT(0 < N && N <= B); - void *to_free_buf[kMaxObjectsToMove]; - int to_free_num = 0; - - { - absl::base_internal::SpinLockHolder h(&lock_); - RingBufferSizeInfo info = GetSlotInfo(); - if (info.used + N <= max_capacity_) { - const bool cache_grown = MakeCacheSpace(size_class, N); - // MakeCacheSpace can drop the lock, so refetch - info = GetSlotInfo(); - if (cache_grown) { - CopyIntoEnd(batch.data(), N, info); - SetSlotInfo(info); - tracking::Report(kTCInsertHit, size_class, 1); - insert_hits_.LossyAdd(1); - return; - } - } - - // If we arrive here, this means that there is not enough capacity in the - // current cache to include the new items, and we cannot grow it. - - // We want to return up to `B` items from the transfer cache and currently - // inserted items. - const int returned_from_cache = std::min<int>(B, info.used); - if (returned_from_cache > 0) { - CopyOutOfStart(to_free_buf, returned_from_cache, info); - } - to_free_num = returned_from_cache; - if (info.used > 0) { - // We didn't have to return the whole cache. This means we can copy - // in all of the inserted items. - ASSERT(info.used + N <= info.capacity); - CopyIntoEnd(batch.data(), N, info); + + RingBufferTransferCache(Manager *owner, int cl) + : RingBufferTransferCache(owner, cl, CapacityNeeded(cl)) {} + + RingBufferTransferCache( + Manager *owner, int cl, + typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity + capacity) + : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY), + slot_info_(RingBufferSizeInfo({0, 0, capacity.capacity})), + max_capacity_(capacity.max_capacity), + freelist_do_not_access_directly_(), + owner_(owner) { + freelist().Init(cl); + if (max_capacity_ == 0) { + // We don't allocate a buffer. Set slots_bitmask_ to 0 to prevent UB. + slots_bitmask_ = 0; + } else { + const size_t slots_size = absl::bit_ceil<size_t>(max_capacity_); + ASSERT(slots_size >= max_capacity_); + ASSERT(slots_size < max_capacity_ * 2); + slots_ = + reinterpret_cast<void **>(owner_->Alloc(slots_size * sizeof(void *))); + slots_bitmask_ = slots_size - 1; + } + } + + RingBufferTransferCache(const RingBufferTransferCache &) = delete; + RingBufferTransferCache &operator=(const RingBufferTransferCache &) = delete; + + // This transfercache implementation handles non-batch sized + // inserts and removes efficiently. + static constexpr bool IsFlexible() { return true; } + + // These methods all do internal locking. + + void AcquireInternalLocks() + { + freelist().AcquireInternalLocks(); + lock_.Lock(); + } + + void ReleaseInternalLocks() + { + lock_.Unlock(); + freelist().ReleaseInternalLocks(); + } + + // Insert the specified batch into the transfer cache. N is the number of + // elements in the range. RemoveRange() is the opposite operation. + void InsertRange(int size_class, absl::Span<void *> batch) + ABSL_LOCKS_EXCLUDED(lock_) { + const int N = batch.size(); + const int B = Manager::num_objects_to_move(size_class); + ASSERT(0 < N && N <= B); + void *to_free_buf[kMaxObjectsToMove]; + int to_free_num = 0; + + { + absl::base_internal::SpinLockHolder h(&lock_); + RingBufferSizeInfo info = GetSlotInfo(); + if (info.used + N <= max_capacity_) { + const bool cache_grown = MakeCacheSpace(size_class, N); + // MakeCacheSpace can drop the lock, so refetch + info = GetSlotInfo(); + if (cache_grown) { + CopyIntoEnd(batch.data(), N, info); + SetSlotInfo(info); + tracking::Report(kTCInsertHit, size_class, 1); + insert_hits_.LossyAdd(1); + return; + } + } + + // If we arrive here, this means that there is not enough capacity in the + // current cache to include the new items, and we cannot grow it. + + // We want to return up to `B` items from the transfer cache and currently + // inserted items. + const int returned_from_cache = std::min<int>(B, info.used); + if (returned_from_cache > 0) { + CopyOutOfStart(to_free_buf, returned_from_cache, info); + } + to_free_num = returned_from_cache; + if (info.used > 0) { + // We didn't have to return the whole cache. This means we can copy + // in all of the inserted items. + ASSERT(info.used + N <= info.capacity); + CopyIntoEnd(batch.data(), N, info); } else { - // The transfercache is empty. We might still not have enough capacity - // to store all of the inserted items though. - const int to_insert_start = std::max(0, N - info.capacity); - ASSERT(returned_from_cache + to_insert_start <= B); - if (to_insert_start > 0) { - // We also want to return some of the inserted items in this case. - memcpy(to_free_buf + to_free_num, batch.data(), - to_insert_start * sizeof(void *)); - to_free_num += to_insert_start; - } - // This is only false if info.capacity is 0. - if (ABSL_PREDICT_TRUE(N > to_insert_start)) { - CopyIntoEnd(batch.data() + to_insert_start, N - to_insert_start, - info); - } + // The transfercache is empty. We might still not have enough capacity + // to store all of the inserted items though. + const int to_insert_start = std::max(0, N - info.capacity); + ASSERT(returned_from_cache + to_insert_start <= B); + if (to_insert_start > 0) { + // We also want to return some of the inserted items in this case. + memcpy(to_free_buf + to_free_num, batch.data(), + to_insert_start * sizeof(void *)); + to_free_num += to_insert_start; + } + // This is only false if info.capacity is 0. + if (ABSL_PREDICT_TRUE(N > to_insert_start)) { + CopyIntoEnd(batch.data() + to_insert_start, N - to_insert_start, + info); + } } - SetSlotInfo(info); - } - // It can work out that we manage to insert all items into the cache after - // all. - if (to_free_num > 0) { - ASSERT(to_free_num <= kMaxObjectsToMove); - ASSERT(to_free_num <= B); - insert_misses_.Add(1); - tracking::Report(kTCInsertMiss, size_class, 1); - freelist().InsertRange(absl::Span<void *>(to_free_buf, to_free_num)); + SetSlotInfo(info); } - } - - // Returns the actual number of fetched elements and stores elements in the - // batch. This might return less than N if the transfercache is non-empty but - // contains fewer elements than N. It is guaranteed to return at least 1 as - // long as either the transfercache or the free list are not empty. - ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N) - ABSL_LOCKS_EXCLUDED(lock_) { - ASSERT(N > 0); - - { - absl::base_internal::SpinLockHolder h(&lock_); - RingBufferSizeInfo info = GetSlotInfo(); - if (info.used > 0) { - // Return up to however much we have in our local cache. - const int copied = std::min<int>(N, info.used); - CopyOutOfEnd(batch, copied, info); - SetSlotInfo(info); - tracking::Report(kTCRemoveHit, size_class, 1); - remove_hits_.LossyAdd(1); - low_water_mark_ = std::min(low_water_mark_, info.used); - return copied; - } - low_water_mark_ = 0; + // It can work out that we manage to insert all items into the cache after + // all. + if (to_free_num > 0) { + ASSERT(to_free_num <= kMaxObjectsToMove); + ASSERT(to_free_num <= B); + insert_misses_.Add(1); + tracking::Report(kTCInsertMiss, size_class, 1); + freelist().InsertRange(absl::Span<void *>(to_free_buf, to_free_num)); + } + } + + // Returns the actual number of fetched elements and stores elements in the + // batch. This might return less than N if the transfercache is non-empty but + // contains fewer elements than N. It is guaranteed to return at least 1 as + // long as either the transfercache or the free list are not empty. + ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N) + ABSL_LOCKS_EXCLUDED(lock_) { + ASSERT(N > 0); + + { + absl::base_internal::SpinLockHolder h(&lock_); + RingBufferSizeInfo info = GetSlotInfo(); + if (info.used > 0) { + // Return up to however much we have in our local cache. + const int copied = std::min<int>(N, info.used); + CopyOutOfEnd(batch, copied, info); + SetSlotInfo(info); + tracking::Report(kTCRemoveHit, size_class, 1); + remove_hits_.LossyAdd(1); + low_water_mark_ = std::min(low_water_mark_, info.used); + return copied; + } + low_water_mark_ = 0; } - remove_misses_.Add(1); - tracking::Report(kTCRemoveMiss, size_class, 1); - return freelist().RemoveRange(batch, N); - } - - // Return all objects not touched since last call to this function. - void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { - if (max_capacity_ == 0) return; - // If the lock is being held, someone is modifying the cache. - if (!lock_.TryLock()) return; - int low_water_mark = low_water_mark_; - low_water_mark_ = std::numeric_limits<int>::max(); - const int B = Manager::num_objects_to_move(size_class); - while (slot_info_.used > 0 && low_water_mark >= B && - (low_water_mark_ == std::numeric_limits<int>::max())) { - const size_t num_to_move(std::min(B, slot_info_.used)); - void *buf[kMaxObjectsToMove]; - CopyOutOfEnd(buf, num_to_move, slot_info_); - low_water_mark -= num_to_move; - lock_.Unlock(); - freelist().InsertRange({buf, num_to_move}); - tracking::Report(kTCElementsPlunder, size_class, num_to_move); - // If someone is starting to use the cache, stop doing this. - if (!lock_.TryLock()) { - return; - } + remove_misses_.Add(1); + tracking::Report(kTCRemoveMiss, size_class, 1); + return freelist().RemoveRange(batch, N); + } + + // Return all objects not touched since last call to this function. + void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + if (max_capacity_ == 0) return; + // If the lock is being held, someone is modifying the cache. + if (!lock_.TryLock()) return; + int low_water_mark = low_water_mark_; + low_water_mark_ = std::numeric_limits<int>::max(); + const int B = Manager::num_objects_to_move(size_class); + while (slot_info_.used > 0 && low_water_mark >= B && + (low_water_mark_ == std::numeric_limits<int>::max())) { + const size_t num_to_move(std::min(B, slot_info_.used)); + void *buf[kMaxObjectsToMove]; + CopyOutOfEnd(buf, num_to_move, slot_info_); + low_water_mark -= num_to_move; + lock_.Unlock(); + freelist().InsertRange({buf, num_to_move}); + tracking::Report(kTCElementsPlunder, size_class, num_to_move); + // If someone is starting to use the cache, stop doing this. + if (!lock_.TryLock()) { + return; + } } - lock_.Unlock(); + lock_.Unlock(); } // Returns the number of free objects in the transfer cache. - size_t tc_length() ABSL_LOCKS_EXCLUDED(lock_) { - absl::base_internal::SpinLockHolder h(&lock_); - return static_cast<size_t>(GetSlotInfo().used); + size_t tc_length() ABSL_LOCKS_EXCLUDED(lock_) { + absl::base_internal::SpinLockHolder h(&lock_); + return static_cast<size_t>(GetSlotInfo().used); } // Returns the number of transfer cache insert/remove hits/misses. - TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) { + TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) { TransferCacheStats stats; - - stats.insert_hits = insert_hits_.value(); - stats.remove_hits = remove_hits_.value(); - stats.insert_misses = insert_misses_.value(); - stats.insert_non_batch_misses = 0; - stats.remove_misses = remove_misses_.value(); - stats.remove_non_batch_misses = 0; - + + stats.insert_hits = insert_hits_.value(); + stats.remove_hits = remove_hits_.value(); + stats.insert_misses = insert_misses_.value(); + stats.insert_non_batch_misses = 0; + stats.remove_misses = remove_misses_.value(); + stats.remove_non_batch_misses = 0; + return stats; } - RingBufferSizeInfo GetSlotInfo() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - return slot_info_; - } - - // REQUIRES: lock is held. - // Tries to make room for N elements. If the cache is full it will try to - // expand it at the cost of some other cache size. Return false if there is - // no space. - bool MakeCacheSpace(int size_class, int N) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - // Increase capacity in number of batches, as we do when reducing capacity. - const int B = Manager::num_objects_to_move(size_class); - ASSERT(B >= N); - - auto info = GetSlotInfo(); - // Is there room in the cache? - if (info.used + N <= info.capacity) return true; - // Check if we can expand this cache? - if (info.capacity + B > max_capacity_) return false; - - // Release the held lock before the other instance tries to grab its lock. - lock_.Unlock(); - int to_evict = owner_->DetermineSizeClassToEvict(); - if (to_evict == size_class) { - lock_.Lock(); - return false; + RingBufferSizeInfo GetSlotInfo() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + return slot_info_; + } + + // REQUIRES: lock is held. + // Tries to make room for N elements. If the cache is full it will try to + // expand it at the cost of some other cache size. Return false if there is + // no space. + bool MakeCacheSpace(int size_class, int N) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + // Increase capacity in number of batches, as we do when reducing capacity. + const int B = Manager::num_objects_to_move(size_class); + ASSERT(B >= N); + + auto info = GetSlotInfo(); + // Is there room in the cache? + if (info.used + N <= info.capacity) return true; + // Check if we can expand this cache? + if (info.capacity + B > max_capacity_) return false; + + // Release the held lock before the other instance tries to grab its lock. + lock_.Unlock(); + int to_evict = owner_->DetermineSizeClassToEvict(); + if (to_evict == size_class) { + lock_.Lock(); + return false; } - bool made_space = owner_->ShrinkCache(to_evict); - lock_.Lock(); - - if (!made_space) return false; - - // Succeeded in evicting, we're going to make our cache larger. However, we - // have dropped and re-acquired the lock, so slot_info_ may have - // changed. Therefore, check and verify that it is still OK to increase the - // cache size. - info = GetSlotInfo(); - if (info.capacity + B > max_capacity_) return false; - info.capacity += B; - SetSlotInfo(info); + bool made_space = owner_->ShrinkCache(to_evict); + lock_.Lock(); + + if (!made_space) return false; + + // Succeeded in evicting, we're going to make our cache larger. However, we + // have dropped and re-acquired the lock, so slot_info_ may have + // changed. Therefore, check and verify that it is still OK to increase the + // cache size. + info = GetSlotInfo(); + if (info.capacity + B > max_capacity_) return false; + info.capacity += B; + SetSlotInfo(info); return true; } - bool HasSpareCapacity(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { - const int n = Manager::num_objects_to_move(size_class); - absl::base_internal::SpinLockHolder h(&lock_); - const auto info = GetSlotInfo(); - return info.capacity - info.used >= n; - } - - // Takes lock_ and invokes MakeCacheSpace() on this cache. Returns true if it - // succeeded at growing the cache by a batch size. - bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { - absl::base_internal::SpinLockHolder h(&lock_); - return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class)); - } - - // REQUIRES: lock_ is *not* held. - // Tries to shrink the Cache. Return false if it failed to shrink the cache. - // Decreases cache_slots_ on success. - bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { - const int N = Manager::num_objects_to_move(size_class); - - void *to_free[kMaxObjectsToMove]; - int num_to_free; - { - absl::base_internal::SpinLockHolder h(&lock_); - auto info = GetSlotInfo(); - if (info.capacity == 0) return false; - if (info.capacity < N) return false; - - const int unused = info.capacity - info.used; - if (N <= unused) { - info.capacity -= N; - SetSlotInfo(info); - return true; - } - - num_to_free = N - unused; - - // Remove from the beginning of the buffer which holds the oldest entries. - // Our internal slot array may get overwritten as soon as we drop the - // lock, so copy the items to free to an on stack buffer. - CopyOutOfStart(to_free, num_to_free, info); - low_water_mark_ = info.used; - info.capacity -= N; - SetSlotInfo(info); + bool HasSpareCapacity(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + const int n = Manager::num_objects_to_move(size_class); + absl::base_internal::SpinLockHolder h(&lock_); + const auto info = GetSlotInfo(); + return info.capacity - info.used >= n; + } + + // Takes lock_ and invokes MakeCacheSpace() on this cache. Returns true if it + // succeeded at growing the cache by a batch size. + bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + absl::base_internal::SpinLockHolder h(&lock_); + return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class)); + } + + // REQUIRES: lock_ is *not* held. + // Tries to shrink the Cache. Return false if it failed to shrink the cache. + // Decreases cache_slots_ on success. + bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) { + const int N = Manager::num_objects_to_move(size_class); + + void *to_free[kMaxObjectsToMove]; + int num_to_free; + { + absl::base_internal::SpinLockHolder h(&lock_); + auto info = GetSlotInfo(); + if (info.capacity == 0) return false; + if (info.capacity < N) return false; + + const int unused = info.capacity - info.used; + if (N <= unused) { + info.capacity -= N; + SetSlotInfo(info); + return true; + } + + num_to_free = N - unused; + + // Remove from the beginning of the buffer which holds the oldest entries. + // Our internal slot array may get overwritten as soon as we drop the + // lock, so copy the items to free to an on stack buffer. + CopyOutOfStart(to_free, num_to_free, info); + low_water_mark_ = info.used; + info.capacity -= N; + SetSlotInfo(info); } - // Access the freelist without holding the lock. - freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)}); - return true; - } - - // This is a thin wrapper for the CentralFreeList. It is intended to ensure - // that we are not holding lock_ when we access it. - ABSL_ATTRIBUTE_ALWAYS_INLINE FreeList &freelist() ABSL_LOCKS_EXCLUDED(lock_) { - return freelist_do_not_access_directly_; - } - - // The const version of the wrapper, needed to call stats on - ABSL_ATTRIBUTE_ALWAYS_INLINE const FreeList &freelist() const - ABSL_LOCKS_EXCLUDED(lock_) { - return freelist_do_not_access_directly_; - } - - private: - // Due to decreased downward pressure, the ring buffer based transfer cache - // contains on average more bytes than the legacy implementation. - // To counteract this, decrease the capacity (but not max capacity). - // TODO(b/161927252): Revisit TransferCache rebalancing strategy - static typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity - CapacityNeeded(int cl) { - auto capacity = - TransferCache<CentralFreeList, TransferCacheManager>::CapacityNeeded( - cl); - const int N = Manager::num_objects_to_move(cl); - if (N == 0) return {0, 0}; - ASSERT(capacity.capacity % N == 0); - // We still want capacity to be in multiples of batches. - const int capacity_in_batches = capacity.capacity / N; - // This factor was found by trial and error. - const int new_batches = - static_cast<int>(std::ceil(capacity_in_batches / 1.5)); - capacity.capacity = new_batches * N; - return capacity; - } - - // Converts a logical index (i.e. i-th element stored in the ring buffer) into - // a physical index into slots_. - size_t GetSlotIndex(size_t start, size_t i) const { - return (start + i) & slots_bitmask_; - } - - // Copies N elements from source to the end of the ring buffer. It updates - // `info`, be sure to call SetSlotInfo() to save the modifications. - // N has to be > 0. - void CopyIntoEnd(void *const *source, size_t N, RingBufferSizeInfo &info) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - ASSERT(N > 0); - ASSERT(info.used + N <= info.capacity); - const size_t begin = GetSlotIndex(info.start, info.used); - const size_t end = GetSlotIndex(info.start, info.used + N); - if (ABSL_PREDICT_FALSE(end < begin && end != 0)) { - // We wrap around the buffer. - memcpy(slots_ + begin, source, sizeof(void *) * (N - end)); - memcpy(slots_, source + (N - end), sizeof(void *) * end); + // Access the freelist without holding the lock. + freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)}); + return true; + } + + // This is a thin wrapper for the CentralFreeList. It is intended to ensure + // that we are not holding lock_ when we access it. + ABSL_ATTRIBUTE_ALWAYS_INLINE FreeList &freelist() ABSL_LOCKS_EXCLUDED(lock_) { + return freelist_do_not_access_directly_; + } + + // The const version of the wrapper, needed to call stats on + ABSL_ATTRIBUTE_ALWAYS_INLINE const FreeList &freelist() const + ABSL_LOCKS_EXCLUDED(lock_) { + return freelist_do_not_access_directly_; + } + + private: + // Due to decreased downward pressure, the ring buffer based transfer cache + // contains on average more bytes than the legacy implementation. + // To counteract this, decrease the capacity (but not max capacity). + // TODO(b/161927252): Revisit TransferCache rebalancing strategy + static typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity + CapacityNeeded(int cl) { + auto capacity = + TransferCache<CentralFreeList, TransferCacheManager>::CapacityNeeded( + cl); + const int N = Manager::num_objects_to_move(cl); + if (N == 0) return {0, 0}; + ASSERT(capacity.capacity % N == 0); + // We still want capacity to be in multiples of batches. + const int capacity_in_batches = capacity.capacity / N; + // This factor was found by trial and error. + const int new_batches = + static_cast<int>(std::ceil(capacity_in_batches / 1.5)); + capacity.capacity = new_batches * N; + return capacity; + } + + // Converts a logical index (i.e. i-th element stored in the ring buffer) into + // a physical index into slots_. + size_t GetSlotIndex(size_t start, size_t i) const { + return (start + i) & slots_bitmask_; + } + + // Copies N elements from source to the end of the ring buffer. It updates + // `info`, be sure to call SetSlotInfo() to save the modifications. + // N has to be > 0. + void CopyIntoEnd(void *const *source, size_t N, RingBufferSizeInfo &info) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + ASSERT(N > 0); + ASSERT(info.used + N <= info.capacity); + const size_t begin = GetSlotIndex(info.start, info.used); + const size_t end = GetSlotIndex(info.start, info.used + N); + if (ABSL_PREDICT_FALSE(end < begin && end != 0)) { + // We wrap around the buffer. + memcpy(slots_ + begin, source, sizeof(void *) * (N - end)); + memcpy(slots_, source + (N - end), sizeof(void *) * end); } else { - memcpy(slots_ + begin, source, sizeof(void *) * N); + memcpy(slots_ + begin, source, sizeof(void *) * N); } - info.used += N; - } - - // Copies N elements stored in slots_ starting at the given logic index into - // target. Does not do any updates to slot_info_. - // N has to be > 0. - // You should use CopyOutOfEnd or CopyOutOfStart instead in most cases. - void CopyOutOfSlots(void **target, size_t N, size_t start, size_t index) const - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - ASSERT(N > 0); - const size_t begin = GetSlotIndex(start, index); - const size_t end = GetSlotIndex(start, index + N); - if (ABSL_PREDICT_FALSE(end < begin && end != 0)) { - // We wrap around the buffer. - memcpy(target, slots_ + begin, sizeof(void *) * (N - end)); - memcpy(target + (N - end), slots_, sizeof(void *) * end); + info.used += N; + } + + // Copies N elements stored in slots_ starting at the given logic index into + // target. Does not do any updates to slot_info_. + // N has to be > 0. + // You should use CopyOutOfEnd or CopyOutOfStart instead in most cases. + void CopyOutOfSlots(void **target, size_t N, size_t start, size_t index) const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + ASSERT(N > 0); + const size_t begin = GetSlotIndex(start, index); + const size_t end = GetSlotIndex(start, index + N); + if (ABSL_PREDICT_FALSE(end < begin && end != 0)) { + // We wrap around the buffer. + memcpy(target, slots_ + begin, sizeof(void *) * (N - end)); + memcpy(target + (N - end), slots_, sizeof(void *) * end); } else { - memcpy(target, slots_ + begin, sizeof(void *) * N); + memcpy(target, slots_ + begin, sizeof(void *) * N); } } - // Copies N elements from the start of the ring buffer into target. Updates - // `info`, be sure to call SetSlotInfo() to save the modifications. - // N has to be > 0. - void CopyOutOfStart(void **target, size_t N, RingBufferSizeInfo &info) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - ASSERT(N > 0); - ASSERT(N <= info.used); - CopyOutOfSlots(target, N, info.start, 0); - info.used -= N; - if (info.used == 0) { - // This makes it less likely that we will have to do copies that wrap - // around in the immediate future. - info.start = 0; - } else { - info.start = (info.start + N) & slots_bitmask_; + // Copies N elements from the start of the ring buffer into target. Updates + // `info`, be sure to call SetSlotInfo() to save the modifications. + // N has to be > 0. + void CopyOutOfStart(void **target, size_t N, RingBufferSizeInfo &info) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + ASSERT(N > 0); + ASSERT(N <= info.used); + CopyOutOfSlots(target, N, info.start, 0); + info.used -= N; + if (info.used == 0) { + // This makes it less likely that we will have to do copies that wrap + // around in the immediate future. + info.start = 0; + } else { + info.start = (info.start + N) & slots_bitmask_; } } - // Copies N elements from the end of the ring buffer into target. Updates - // `info`, be sure to call SetSlotInfo() to save the modifications. - // N has to be > 0. - void CopyOutOfEnd(void **target, size_t N, RingBufferSizeInfo &info) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - ASSERT(N > 0); - ASSERT(N <= info.used); - info.used -= N; - CopyOutOfSlots(target, N, info.start, info.used); - if (info.used == 0) { - // This makes it less likely that we will have to do copies that wrap - // around in the immediate future. - info.start = 0; + // Copies N elements from the end of the ring buffer into target. Updates + // `info`, be sure to call SetSlotInfo() to save the modifications. + // N has to be > 0. + void CopyOutOfEnd(void **target, size_t N, RingBufferSizeInfo &info) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + ASSERT(N > 0); + ASSERT(N <= info.used); + info.used -= N; + CopyOutOfSlots(target, N, info.start, info.used); + if (info.used == 0) { + // This makes it less likely that we will have to do copies that wrap + // around in the immediate future. + info.start = 0; } } - void SetSlotInfo(RingBufferSizeInfo info) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { - ASSERT(0 <= info.start); - ASSERT((info.start & slots_bitmask_) == info.start); - ASSERT(0 <= info.used); - ASSERT(info.used <= info.capacity); - ASSERT(info.capacity <= max_capacity_); - slot_info_ = info; - } - - // Pointer to array of free objects. - void **slots_ ABSL_GUARDED_BY(lock_); - - // This lock protects all the data members. used_slots_ and cache_slots_ - // may be looked at without holding the lock. - absl::base_internal::SpinLock lock_; - - // Number of currently used and available cached entries in slots_. Use - // GetSlotInfo() to read this. - // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_] - RingBufferSizeInfo slot_info_ ABSL_GUARDED_BY(lock_); - - // Lowest value of "slot_info_.used" since last call to TryPlunder. All - // elements not used for a full cycle (2 seconds) are unlikely to get used - // again. - int low_water_mark_ ABSL_GUARDED_BY(lock_) = std::numeric_limits<int>::max(); - - // Maximum size of the cache. - const int32_t max_capacity_; - // This is a bitmask used instead of a modulus in the ringbuffer index - // calculations. This is 1 smaller than the size of slots_ which itself has - // the size of `absl::bit_ceil(max_capacity_)`, i.e. the smallest power of two - // >= max_capacity_. - size_t slots_bitmask_; - - // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations - // and use LossyAdd, but the thread annotations cannot indicate that we do not - // need a lock for reads. - StatsCounter insert_hits_; - StatsCounter remove_hits_; - // Miss counters do not hold lock_, so they use Add. - StatsCounter insert_misses_; - StatsCounter remove_misses_; - - FreeList freelist_do_not_access_directly_; - Manager *const owner_; + void SetSlotInfo(RingBufferSizeInfo info) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { + ASSERT(0 <= info.start); + ASSERT((info.start & slots_bitmask_) == info.start); + ASSERT(0 <= info.used); + ASSERT(info.used <= info.capacity); + ASSERT(info.capacity <= max_capacity_); + slot_info_ = info; + } + + // Pointer to array of free objects. + void **slots_ ABSL_GUARDED_BY(lock_); + + // This lock protects all the data members. used_slots_ and cache_slots_ + // may be looked at without holding the lock. + absl::base_internal::SpinLock lock_; + + // Number of currently used and available cached entries in slots_. Use + // GetSlotInfo() to read this. + // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_] + RingBufferSizeInfo slot_info_ ABSL_GUARDED_BY(lock_); + + // Lowest value of "slot_info_.used" since last call to TryPlunder. All + // elements not used for a full cycle (2 seconds) are unlikely to get used + // again. + int low_water_mark_ ABSL_GUARDED_BY(lock_) = std::numeric_limits<int>::max(); + + // Maximum size of the cache. + const int32_t max_capacity_; + // This is a bitmask used instead of a modulus in the ringbuffer index + // calculations. This is 1 smaller than the size of slots_ which itself has + // the size of `absl::bit_ceil(max_capacity_)`, i.e. the smallest power of two + // >= max_capacity_. + size_t slots_bitmask_; + + // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations + // and use LossyAdd, but the thread annotations cannot indicate that we do not + // need a lock for reads. + StatsCounter insert_hits_; + StatsCounter remove_hits_; + // Miss counters do not hold lock_, so they use Add. + StatsCounter insert_misses_; + StatsCounter remove_misses_; + + FreeList freelist_do_not_access_directly_; + Manager *const owner_; } ABSL_CACHELINE_ALIGNED; -} // namespace tcmalloc::tcmalloc_internal::internal_transfer_cache -GOOGLE_MALLOC_SECTION_END +} // namespace tcmalloc::tcmalloc_internal::internal_transfer_cache +GOOGLE_MALLOC_SECTION_END #endif // TCMALLOC_TRANSFER_CACHE_INTERNAL_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h index fdc8fba53c..ffa551c26b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h @@ -18,18 +18,18 @@ #include <stddef.h> namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { struct TransferCacheStats { size_t insert_hits; size_t insert_misses; - size_t insert_non_batch_misses; + size_t insert_non_batch_misses; size_t remove_hits; size_t remove_misses; - size_t remove_non_batch_misses; + size_t remove_non_batch_misses; }; -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc #endif // TCMALLOC_TRANSFER_CACHE_STATS_H_ diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc index 4531f7a921..9a9ed5bed5 100644 --- a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc +++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc @@ -14,7 +14,7 @@ #include "tcmalloc/transfer_cache.h" -#include <algorithm> +#include <algorithm> #include <atomic> #include <cmath> #include <cstring> @@ -38,11 +38,11 @@ #include "tcmalloc/transfer_cache_internals.h" namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { namespace { -static constexpr int kSizeClass = 0; - +static constexpr int kSizeClass = 0; + template <typename Env> using TransferCacheTest = ::testing::Test; TYPED_TEST_SUITE_P(TransferCacheTest); @@ -50,112 +50,112 @@ TYPED_TEST_SUITE_P(TransferCacheTest); TYPED_TEST_P(TransferCacheTest, IsolatedSmoke) { const int batch_size = TypeParam::kBatchSize; TypeParam e; - EXPECT_CALL(e.central_freelist(), InsertRange) - .Times(e.transfer_cache().IsFlexible() ? 0 : 1); - EXPECT_CALL(e.central_freelist(), RemoveRange) - .Times(e.transfer_cache().IsFlexible() ? 0 : 1); + EXPECT_CALL(e.central_freelist(), InsertRange) + .Times(e.transfer_cache().IsFlexible() ? 0 : 1); + EXPECT_CALL(e.central_freelist(), RemoveRange) + .Times(e.transfer_cache().IsFlexible() ? 0 : 1); EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0); EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0); EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 0); EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0); e.Insert(batch_size); EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1); e.Insert(batch_size); EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2); - e.Insert(batch_size - 1); - if (e.transfer_cache().IsFlexible()) { - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 3); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0); - } else { - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 1); - } + e.Insert(batch_size - 1); + if (e.transfer_cache().IsFlexible()) { + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 3); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0); + } else { + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 1); + } e.Remove(batch_size); EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1); e.Remove(batch_size); EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2); - e.Remove(batch_size - 1); - if (e.transfer_cache().IsFlexible()) { - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 3); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0); - } else { - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 1); - } -} - -TYPED_TEST_P(TransferCacheTest, ReadStats) { - const int batch_size = TypeParam::kBatchSize; - TypeParam e; - EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); - EXPECT_CALL(e.central_freelist(), RemoveRange).Times(0); - - // Ensure there is at least one insert hit/remove hit, so we can assert a - // non-tautology in t2. - e.Insert(batch_size); - e.Remove(batch_size); - - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0); - - std::atomic<bool> stop{false}; - - std::thread t1([&]() { - while (!stop.load(std::memory_order_acquire)) { - e.Insert(batch_size); - e.Remove(batch_size); - } - }); - - std::thread t2([&]() { - while (!stop.load(std::memory_order_acquire)) { - auto stats = e.transfer_cache().GetHitRateStats(); - CHECK_CONDITION(stats.insert_hits >= 1); - CHECK_CONDITION(stats.insert_misses == 0); - CHECK_CONDITION(stats.insert_non_batch_misses == 0); - CHECK_CONDITION(stats.remove_hits >= 1); - CHECK_CONDITION(stats.remove_misses == 0); - CHECK_CONDITION(stats.remove_non_batch_misses == 0); - } - }); - - absl::SleepFor(absl::Seconds(1)); - stop.store(true, std::memory_order_release); - - t1.join(); - t2.join(); -} - -TYPED_TEST_P(TransferCacheTest, SingleItemSmoke) { - const int batch_size = TypeParam::kBatchSize; - if (batch_size == 1) { - GTEST_SKIP() << "skipping trivial batch size"; - } - TypeParam e; - const int actions = e.transfer_cache().IsFlexible() ? 2 : 0; - EXPECT_CALL(e.central_freelist(), InsertRange).Times(2 - actions); - EXPECT_CALL(e.central_freelist(), RemoveRange).Times(2 - actions); - - e.Insert(1); - e.Insert(1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, actions); - e.Remove(1); - e.Remove(1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, actions); + e.Remove(batch_size - 1); + if (e.transfer_cache().IsFlexible()) { + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 3); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0); + } else { + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 1); + } } +TYPED_TEST_P(TransferCacheTest, ReadStats) { + const int batch_size = TypeParam::kBatchSize; + TypeParam e; + EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); + EXPECT_CALL(e.central_freelist(), RemoveRange).Times(0); + + // Ensure there is at least one insert hit/remove hit, so we can assert a + // non-tautology in t2. + e.Insert(batch_size); + e.Remove(batch_size); + + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0); + + std::atomic<bool> stop{false}; + + std::thread t1([&]() { + while (!stop.load(std::memory_order_acquire)) { + e.Insert(batch_size); + e.Remove(batch_size); + } + }); + + std::thread t2([&]() { + while (!stop.load(std::memory_order_acquire)) { + auto stats = e.transfer_cache().GetHitRateStats(); + CHECK_CONDITION(stats.insert_hits >= 1); + CHECK_CONDITION(stats.insert_misses == 0); + CHECK_CONDITION(stats.insert_non_batch_misses == 0); + CHECK_CONDITION(stats.remove_hits >= 1); + CHECK_CONDITION(stats.remove_misses == 0); + CHECK_CONDITION(stats.remove_non_batch_misses == 0); + } + }); + + absl::SleepFor(absl::Seconds(1)); + stop.store(true, std::memory_order_release); + + t1.join(); + t2.join(); +} + +TYPED_TEST_P(TransferCacheTest, SingleItemSmoke) { + const int batch_size = TypeParam::kBatchSize; + if (batch_size == 1) { + GTEST_SKIP() << "skipping trivial batch size"; + } + TypeParam e; + const int actions = e.transfer_cache().IsFlexible() ? 2 : 0; + EXPECT_CALL(e.central_freelist(), InsertRange).Times(2 - actions); + EXPECT_CALL(e.central_freelist(), RemoveRange).Times(2 - actions); + + e.Insert(1); + e.Insert(1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, actions); + e.Remove(1); + e.Remove(1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, actions); +} + TYPED_TEST_P(TransferCacheTest, FetchesFromFreelist) { const int batch_size = TypeParam::kBatchSize; TypeParam e; @@ -192,7 +192,7 @@ TYPED_TEST_P(TransferCacheTest, EvictsOtherCaches) { }); EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); - while (e.transfer_cache().HasSpareCapacity(kSizeClass)) { + while (e.transfer_cache().HasSpareCapacity(kSizeClass)) { e.Insert(batch_size); } size_t old_hits = e.transfer_cache().GetHitRateStats().insert_hits; @@ -201,62 +201,62 @@ TYPED_TEST_P(TransferCacheTest, EvictsOtherCaches) { EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); } -TYPED_TEST_P(TransferCacheTest, EvictsOtherCachesFlex) { - const int batch_size = TypeParam::kBatchSize; - TypeParam e; - - EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() { - return true; - }); - if (e.transfer_cache().IsFlexible()) { - EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); - } else { - EXPECT_CALL(e.central_freelist(), InsertRange).Times(batch_size - 1); - } - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); - - int total = 0; - for (int i = 1; i <= batch_size; i++) { - e.Insert(i); - total += i; - } - - if (e.transfer_cache().IsFlexible()) { - EXPECT_EQ(e.transfer_cache().tc_length(), total); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, batch_size); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); - } else { - EXPECT_EQ(e.transfer_cache().tc_length(), 1 * batch_size); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1); - EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, - batch_size - 1); - } -} - -// Similar to EvictsOtherCachesFlex, but with full cache. -TYPED_TEST_P(TransferCacheTest, FullCacheFlex) { - const int batch_size = TypeParam::kBatchSize; - TypeParam e; - - EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() { - return true; - }); - if (e.transfer_cache().IsFlexible()) { - EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); - } else { - EXPECT_CALL(e.central_freelist(), InsertRange) - .Times(testing::AtLeast(batch_size)); - } - - while (e.transfer_cache().HasSpareCapacity(kSizeClass)) { - e.Insert(batch_size); - } - for (int i = 1; i < batch_size + 2; i++) { - e.Insert(i); - } -} - +TYPED_TEST_P(TransferCacheTest, EvictsOtherCachesFlex) { + const int batch_size = TypeParam::kBatchSize; + TypeParam e; + + EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() { + return true; + }); + if (e.transfer_cache().IsFlexible()) { + EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); + } else { + EXPECT_CALL(e.central_freelist(), InsertRange).Times(batch_size - 1); + } + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); + + int total = 0; + for (int i = 1; i <= batch_size; i++) { + e.Insert(i); + total += i; + } + + if (e.transfer_cache().IsFlexible()) { + EXPECT_EQ(e.transfer_cache().tc_length(), total); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, batch_size); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0); + } else { + EXPECT_EQ(e.transfer_cache().tc_length(), 1 * batch_size); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1); + EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, + batch_size - 1); + } +} + +// Similar to EvictsOtherCachesFlex, but with full cache. +TYPED_TEST_P(TransferCacheTest, FullCacheFlex) { + const int batch_size = TypeParam::kBatchSize; + TypeParam e; + + EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() { + return true; + }); + if (e.transfer_cache().IsFlexible()) { + EXPECT_CALL(e.central_freelist(), InsertRange).Times(0); + } else { + EXPECT_CALL(e.central_freelist(), InsertRange) + .Times(testing::AtLeast(batch_size)); + } + + while (e.transfer_cache().HasSpareCapacity(kSizeClass)) { + e.Insert(batch_size); + } + for (int i = 1; i < batch_size + 2; i++) { + e.Insert(i); + } +} + TYPED_TEST_P(TransferCacheTest, PushesToFreelist) { const int batch_size = TypeParam::kBatchSize; TypeParam e; @@ -266,7 +266,7 @@ TYPED_TEST_P(TransferCacheTest, PushesToFreelist) { }); EXPECT_CALL(e.central_freelist(), InsertRange).Times(1); - while (e.transfer_cache().HasSpareCapacity(kSizeClass)) { + while (e.transfer_cache().HasSpareCapacity(kSizeClass)) { e.Insert(batch_size); } size_t old_hits = e.transfer_cache().GetHitRateStats().insert_hits; @@ -281,7 +281,7 @@ TYPED_TEST_P(TransferCacheTest, WrappingWorks) { TypeParam env; EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0); - while (env.transfer_cache().HasSpareCapacity(kSizeClass)) { + while (env.transfer_cache().HasSpareCapacity(kSizeClass)) { env.Insert(batch_size); } for (int i = 0; i < 100; ++i) { @@ -290,60 +290,60 @@ TYPED_TEST_P(TransferCacheTest, WrappingWorks) { } } -TYPED_TEST_P(TransferCacheTest, WrappingFlex) { - const int batch_size = TypeParam::kBatchSize; - - TypeParam env; - EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0); - if (env.transfer_cache().IsFlexible()) { - EXPECT_CALL(env.central_freelist(), InsertRange).Times(0); - EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0); - } - - while (env.transfer_cache().HasSpareCapacity(kSizeClass)) { - env.Insert(batch_size); - } - for (int i = 0; i < 100; ++i) { - for (size_t size = 1; size < batch_size + 2; size++) { - env.Remove(size); - env.Insert(size); - } - } -} - -TYPED_TEST_P(TransferCacheTest, Plunder) { - TypeParam env; - // EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0); - // EXPECT_CALL(env.central_freelist(), InsertRange).Times(1); - // Fill in some elements. - env.Insert(TypeParam::kBatchSize); - env.Insert(TypeParam::kBatchSize); - ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize); - // All these elements will be plundered. - env.transfer_cache().TryPlunder(kSizeClass); - ASSERT_EQ(env.transfer_cache().tc_length(), 0); - - env.Insert(TypeParam::kBatchSize); - env.Insert(TypeParam::kBatchSize); - ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize); - - void* buf[TypeParam::kBatchSize]; - // -1 +1, this sets the low_water_mark (the lowest end-state after a - // call to RemoveRange to 1 batch. - (void)env.transfer_cache().RemoveRange(kSizeClass, buf, - TypeParam::kBatchSize); - env.transfer_cache().InsertRange(kSizeClass, {buf, TypeParam::kBatchSize}); - ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize); - // We have one batch, and this is the same as the low water mark, so nothing - // gets plundered. - env.transfer_cache().TryPlunder(kSizeClass); - ASSERT_EQ(env.transfer_cache().tc_length(), TypeParam::kBatchSize); - // If we plunder immediately the low_water_mark is at maxint, and eveything - // gets plundered. - env.transfer_cache().TryPlunder(kSizeClass); - ASSERT_EQ(env.transfer_cache().tc_length(), 0); -} - +TYPED_TEST_P(TransferCacheTest, WrappingFlex) { + const int batch_size = TypeParam::kBatchSize; + + TypeParam env; + EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0); + if (env.transfer_cache().IsFlexible()) { + EXPECT_CALL(env.central_freelist(), InsertRange).Times(0); + EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0); + } + + while (env.transfer_cache().HasSpareCapacity(kSizeClass)) { + env.Insert(batch_size); + } + for (int i = 0; i < 100; ++i) { + for (size_t size = 1; size < batch_size + 2; size++) { + env.Remove(size); + env.Insert(size); + } + } +} + +TYPED_TEST_P(TransferCacheTest, Plunder) { + TypeParam env; + // EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0); + // EXPECT_CALL(env.central_freelist(), InsertRange).Times(1); + // Fill in some elements. + env.Insert(TypeParam::kBatchSize); + env.Insert(TypeParam::kBatchSize); + ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize); + // All these elements will be plundered. + env.transfer_cache().TryPlunder(kSizeClass); + ASSERT_EQ(env.transfer_cache().tc_length(), 0); + + env.Insert(TypeParam::kBatchSize); + env.Insert(TypeParam::kBatchSize); + ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize); + + void* buf[TypeParam::kBatchSize]; + // -1 +1, this sets the low_water_mark (the lowest end-state after a + // call to RemoveRange to 1 batch. + (void)env.transfer_cache().RemoveRange(kSizeClass, buf, + TypeParam::kBatchSize); + env.transfer_cache().InsertRange(kSizeClass, {buf, TypeParam::kBatchSize}); + ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize); + // We have one batch, and this is the same as the low water mark, so nothing + // gets plundered. + env.transfer_cache().TryPlunder(kSizeClass); + ASSERT_EQ(env.transfer_cache().tc_length(), TypeParam::kBatchSize); + // If we plunder immediately the low_water_mark is at maxint, and eveything + // gets plundered. + env.transfer_cache().TryPlunder(kSizeClass); + ASSERT_EQ(env.transfer_cache().tc_length(), 0); +} + // PickCoprimeBatchSize picks a batch size in [2, max_batch_size) that is // coprime with 2^32. We choose the largest possible batch size within that // constraint to minimize the number of iterations of insert/remove required. @@ -358,12 +358,12 @@ static size_t PickCoprimeBatchSize(size_t max_batch_size) { return max_batch_size; } -TEST(RingBufferTest, b172283201) { +TEST(RingBufferTest, b172283201) { // This test is designed to exercise the wraparound behavior for the - // RingBufferTransferCache, which manages its indices in uint32_t's. Because - // it uses a non-standard batch size (kBatchSize) as part of - // PickCoprimeBatchSize, it triggers a TransferCache miss to the - // CentralFreeList, which is uninteresting for exercising b/172283201. + // RingBufferTransferCache, which manages its indices in uint32_t's. Because + // it uses a non-standard batch size (kBatchSize) as part of + // PickCoprimeBatchSize, it triggers a TransferCache miss to the + // CentralFreeList, which is uninteresting for exercising b/172283201. // For performance reasons, limit to optimized builds. #if !defined(NDEBUG) @@ -374,8 +374,8 @@ TEST(RingBufferTest, b172283201) { #endif using EnvType = FakeTransferCacheEnvironment< - internal_transfer_cache::RingBufferTransferCache< - MockCentralFreeList, MockTransferCacheManager>>; + internal_transfer_cache::RingBufferTransferCache< + MockCentralFreeList, MockTransferCacheManager>>; EnvType env; // We pick the largest value <= EnvType::kBatchSize to use as a batch size, @@ -396,63 +396,63 @@ TEST(RingBufferTest, b172283201) { pointers.push_back(&buffer[i]); } - // To produce wraparound in the RingBufferTransferCache, we fill up the cache - // completely and then keep inserting new elements. This makes the cache - // return old elements to the freelist and eventually wrap around. + // To produce wraparound in the RingBufferTransferCache, we fill up the cache + // completely and then keep inserting new elements. This makes the cache + // return old elements to the freelist and eventually wrap around. EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0); - // We do return items to the freelist, don't try to actually free them. - ON_CALL(env.central_freelist(), InsertRange).WillByDefault(testing::Return()); - ON_CALL(env.transfer_cache_manager(), DetermineSizeClassToEvict) - .WillByDefault(testing::Return(kSizeClass)); - - // First fill up the cache to its capacity. - - while (env.transfer_cache().HasSpareCapacity(kSizeClass) || - env.transfer_cache().GrowCache(kSizeClass)) { - env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers)); + // We do return items to the freelist, don't try to actually free them. + ON_CALL(env.central_freelist(), InsertRange).WillByDefault(testing::Return()); + ON_CALL(env.transfer_cache_manager(), DetermineSizeClassToEvict) + .WillByDefault(testing::Return(kSizeClass)); + + // First fill up the cache to its capacity. + + while (env.transfer_cache().HasSpareCapacity(kSizeClass) || + env.transfer_cache().GrowCache(kSizeClass)) { + env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers)); + } + + // The current size of the transfer cache is close to its capacity. Insert + // enough batches to make sure we wrap around twice (1 batch size should wrap + // around as we are full currently, then insert the same amount of items + // again, then one more wrap around). + const size_t kObjects = env.transfer_cache().tc_length() + 2 * batch_size; + + // From now on, calls to InsertRange() should result in a corresponding call + // to the freelist whenever the cache is full. This doesn't happen on every + // call, as we return up to num_to_move (i.e. kBatchSize) items to the free + // list in one batch. + EXPECT_CALL(env.central_freelist(), + InsertRange(testing::SizeIs(EnvType::kBatchSize))) + .Times(testing::AnyNumber()); + for (size_t i = 0; i < kObjects; i += batch_size) { + env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers)); } - - // The current size of the transfer cache is close to its capacity. Insert - // enough batches to make sure we wrap around twice (1 batch size should wrap - // around as we are full currently, then insert the same amount of items - // again, then one more wrap around). - const size_t kObjects = env.transfer_cache().tc_length() + 2 * batch_size; - - // From now on, calls to InsertRange() should result in a corresponding call - // to the freelist whenever the cache is full. This doesn't happen on every - // call, as we return up to num_to_move (i.e. kBatchSize) items to the free - // list in one batch. - EXPECT_CALL(env.central_freelist(), - InsertRange(testing::SizeIs(EnvType::kBatchSize))) - .Times(testing::AnyNumber()); - for (size_t i = 0; i < kObjects; i += batch_size) { - env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers)); - } - // Manually drain the items in the transfercache, otherwise the destructor - // will try to free them. - std::vector<void*> to_free(batch_size); - size_t N = env.transfer_cache().tc_length(); - while (N > 0) { - const size_t to_remove = std::min(N, batch_size); - const size_t removed = - env.transfer_cache().RemoveRange(kSizeClass, to_free.data(), to_remove); - ASSERT_THAT(removed, testing::Le(to_remove)); - ASSERT_THAT(removed, testing::Gt(0)); - N -= removed; - } - ASSERT_EQ(env.transfer_cache().tc_length(), 0); + // Manually drain the items in the transfercache, otherwise the destructor + // will try to free them. + std::vector<void*> to_free(batch_size); + size_t N = env.transfer_cache().tc_length(); + while (N > 0) { + const size_t to_remove = std::min(N, batch_size); + const size_t removed = + env.transfer_cache().RemoveRange(kSizeClass, to_free.data(), to_remove); + ASSERT_THAT(removed, testing::Le(to_remove)); + ASSERT_THAT(removed, testing::Gt(0)); + N -= removed; + } + ASSERT_EQ(env.transfer_cache().tc_length(), 0); } -REGISTER_TYPED_TEST_SUITE_P(TransferCacheTest, IsolatedSmoke, ReadStats, +REGISTER_TYPED_TEST_SUITE_P(TransferCacheTest, IsolatedSmoke, ReadStats, FetchesFromFreelist, PartialFetchFromFreelist, - EvictsOtherCaches, PushesToFreelist, WrappingWorks, - SingleItemSmoke, EvictsOtherCachesFlex, - FullCacheFlex, WrappingFlex, Plunder); + EvictsOtherCaches, PushesToFreelist, WrappingWorks, + SingleItemSmoke, EvictsOtherCachesFlex, + FullCacheFlex, WrappingFlex, Plunder); template <typename Env> -using FuzzTest = ::testing::Test; -TYPED_TEST_SUITE_P(FuzzTest); +using FuzzTest = ::testing::Test; +TYPED_TEST_SUITE_P(FuzzTest); -TYPED_TEST_P(FuzzTest, MultiThreadedUnbiased) { +TYPED_TEST_P(FuzzTest, MultiThreadedUnbiased) { TypeParam env; ThreadManager threads; threads.Start(10, [&](int) { env.RandomlyPoke(); }); @@ -462,7 +462,7 @@ TYPED_TEST_P(FuzzTest, MultiThreadedUnbiased) { threads.Stop(); } -TYPED_TEST_P(FuzzTest, MultiThreadedBiasedInsert) { +TYPED_TEST_P(FuzzTest, MultiThreadedBiasedInsert) { const int batch_size = TypeParam::kBatchSize; TypeParam env; @@ -474,7 +474,7 @@ TYPED_TEST_P(FuzzTest, MultiThreadedBiasedInsert) { threads.Stop(); } -TYPED_TEST_P(FuzzTest, MultiThreadedBiasedRemove) { +TYPED_TEST_P(FuzzTest, MultiThreadedBiasedRemove) { const int batch_size = TypeParam::kBatchSize; TypeParam env; @@ -486,7 +486,7 @@ TYPED_TEST_P(FuzzTest, MultiThreadedBiasedRemove) { threads.Stop(); } -TYPED_TEST_P(FuzzTest, MultiThreadedBiasedShrink) { +TYPED_TEST_P(FuzzTest, MultiThreadedBiasedShrink) { TypeParam env; ThreadManager threads; threads.Start(10, [&](int) { env.RandomlyPoke(); }); @@ -496,7 +496,7 @@ TYPED_TEST_P(FuzzTest, MultiThreadedBiasedShrink) { threads.Stop(); } -TYPED_TEST_P(FuzzTest, MultiThreadedBiasedGrow) { +TYPED_TEST_P(FuzzTest, MultiThreadedBiasedGrow) { TypeParam env; ThreadManager threads; threads.Start(10, [&](int) { env.RandomlyPoke(); }); @@ -506,120 +506,120 @@ TYPED_TEST_P(FuzzTest, MultiThreadedBiasedGrow) { threads.Stop(); } -REGISTER_TYPED_TEST_SUITE_P(FuzzTest, MultiThreadedUnbiased, +REGISTER_TYPED_TEST_SUITE_P(FuzzTest, MultiThreadedUnbiased, MultiThreadedBiasedInsert, MultiThreadedBiasedRemove, MultiThreadedBiasedGrow, MultiThreadedBiasedShrink); namespace unit_tests { -using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache< - MockCentralFreeList, MockTransferCacheManager>>; -INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TransferCacheTest, - ::testing::Types<Env>); - -using RingBufferEnv = FakeTransferCacheEnvironment< - internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList, - MockTransferCacheManager>>; -INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TransferCacheTest, - ::testing::Types<RingBufferEnv>); +using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache< + MockCentralFreeList, MockTransferCacheManager>>; +INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TransferCacheTest, + ::testing::Types<Env>); + +using RingBufferEnv = FakeTransferCacheEnvironment< + internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList, + MockTransferCacheManager>>; +INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TransferCacheTest, + ::testing::Types<RingBufferEnv>); } // namespace unit_tests namespace fuzz_tests { // Use the FakeCentralFreeList instead of the MockCentralFreeList for fuzz tests // as it avoids the overheads of mocks and allows more iterations of the fuzzing // itself. -using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache< - MockCentralFreeList, MockTransferCacheManager>>; -INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, FuzzTest, ::testing::Types<Env>); - -using RingBufferEnv = FakeTransferCacheEnvironment< - internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList, - MockTransferCacheManager>>; -INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, FuzzTest, - ::testing::Types<RingBufferEnv>); +using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache< + MockCentralFreeList, MockTransferCacheManager>>; +INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, FuzzTest, ::testing::Types<Env>); + +using RingBufferEnv = FakeTransferCacheEnvironment< + internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList, + MockTransferCacheManager>>; +INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, FuzzTest, + ::testing::Types<RingBufferEnv>); } // namespace fuzz_tests -namespace leak_tests { - -template <typename Env> -using TwoSizeClassTest = ::testing::Test; -TYPED_TEST_SUITE_P(TwoSizeClassTest); - -TYPED_TEST_P(TwoSizeClassTest, NoLeaks) { - TypeParam env; - - // The point of this test is to see that adding "random" amounts of - // allocations to the transfer caches behaves correctly, even in the case that - // there are multiple size classes interacting by stealing from each other. - - // Fill all caches to their maximum without starting to steal from each other. - for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) { - const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); - while (env.transfer_cache_manager().HasSpareCapacity(cl)) { - env.Insert(cl, batch_size); - } - } - - // Count the number of batches currently in the cache. - auto count_batches = [&env]() { - int batch_count = 0; - for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) { - const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); - batch_count += env.transfer_cache_manager().tc_length(cl) / batch_size; - } - return batch_count; - }; - - absl::BitGen bitgen; - const int max_batches = count_batches(); - int expected_batches = max_batches; - for (int i = 0; i < 100; ++i) { - { - // First remove. - const int cl = - absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses); - const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); - if (env.transfer_cache_manager().tc_length(cl) >= batch_size) { - env.Remove(cl, batch_size); - --expected_batches; - } - const int current_batches = count_batches(); - EXPECT_EQ(current_batches, expected_batches) << "iteration " << i; - } - { - // Then add in another size class. - const int cl = - absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses); - // Evict from the "next" size class, skipping 0. - // This makes sure we are always evicting from somewhere if at all - // possible. - env.transfer_cache_manager().evicting_from_ = - 1 + cl % (TypeParam::Manager::kSizeClasses - 1); - if (expected_batches < max_batches) { - const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); - env.Insert(cl, batch_size); - ++expected_batches; - } - const int current_batches = count_batches(); - EXPECT_EQ(current_batches, expected_batches) << "iteration " << i; - } - } -} - -REGISTER_TYPED_TEST_SUITE_P(TwoSizeClassTest, NoLeaks); - -using TwoTransferCacheEnv = - TwoSizeClassEnv<internal_transfer_cache::TransferCache>; -INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TwoSizeClassTest, - ::testing::Types<TwoTransferCacheEnv>); - -using TwoRingBufferEnv = - TwoSizeClassEnv<internal_transfer_cache::RingBufferTransferCache>; -INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TwoSizeClassTest, - ::testing::Types<TwoRingBufferEnv>); - -} // namespace leak_tests - +namespace leak_tests { + +template <typename Env> +using TwoSizeClassTest = ::testing::Test; +TYPED_TEST_SUITE_P(TwoSizeClassTest); + +TYPED_TEST_P(TwoSizeClassTest, NoLeaks) { + TypeParam env; + + // The point of this test is to see that adding "random" amounts of + // allocations to the transfer caches behaves correctly, even in the case that + // there are multiple size classes interacting by stealing from each other. + + // Fill all caches to their maximum without starting to steal from each other. + for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) { + const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); + while (env.transfer_cache_manager().HasSpareCapacity(cl)) { + env.Insert(cl, batch_size); + } + } + + // Count the number of batches currently in the cache. + auto count_batches = [&env]() { + int batch_count = 0; + for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) { + const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); + batch_count += env.transfer_cache_manager().tc_length(cl) / batch_size; + } + return batch_count; + }; + + absl::BitGen bitgen; + const int max_batches = count_batches(); + int expected_batches = max_batches; + for (int i = 0; i < 100; ++i) { + { + // First remove. + const int cl = + absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses); + const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); + if (env.transfer_cache_manager().tc_length(cl) >= batch_size) { + env.Remove(cl, batch_size); + --expected_batches; + } + const int current_batches = count_batches(); + EXPECT_EQ(current_batches, expected_batches) << "iteration " << i; + } + { + // Then add in another size class. + const int cl = + absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses); + // Evict from the "next" size class, skipping 0. + // This makes sure we are always evicting from somewhere if at all + // possible. + env.transfer_cache_manager().evicting_from_ = + 1 + cl % (TypeParam::Manager::kSizeClasses - 1); + if (expected_batches < max_batches) { + const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl); + env.Insert(cl, batch_size); + ++expected_batches; + } + const int current_batches = count_batches(); + EXPECT_EQ(current_batches, expected_batches) << "iteration " << i; + } + } +} + +REGISTER_TYPED_TEST_SUITE_P(TwoSizeClassTest, NoLeaks); + +using TwoTransferCacheEnv = + TwoSizeClassEnv<internal_transfer_cache::TransferCache>; +INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TwoSizeClassTest, + ::testing::Types<TwoTransferCacheEnv>); + +using TwoRingBufferEnv = + TwoSizeClassEnv<internal_transfer_cache::RingBufferTransferCache>; +INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TwoSizeClassTest, + ::testing::Types<TwoRingBufferEnv>); + +} // namespace leak_tests + } // namespace -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc b/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc index b488ceb54f..6047944bea 100644 --- a/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc +++ b/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "absl/base/attributes.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // This -if linked into a binary - overrides page_allocator.cc and forces HPAA // on/subrelease off. @@ -25,6 +25,6 @@ ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; } ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc b/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc index 323cce40ed..acad45c51b 100644 --- a/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc +++ b/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "absl/base/attributes.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // This -if linked into a binary - overrides page_allocator.cc and forces HPAA // on/subrelease on. @@ -25,6 +25,6 @@ ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; } ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return 1; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc b/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc index 28580e13ed..5a46481c05 100644 --- a/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc +++ b/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc @@ -13,16 +13,16 @@ // limitations under the License. #include "absl/base/attributes.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // This -if linked into a binary - overrides common.cc and // forces old span sizes. ABSL_ATTRIBUTE_UNUSED int default_want_legacy_spans() { return 1; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc b/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc index e23d93d9ce..700efa2064 100644 --- a/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc +++ b/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "absl/base/attributes.h" -#include "tcmalloc/internal/config.h" +#include "tcmalloc/internal/config.h" -GOOGLE_MALLOC_SECTION_BEGIN +GOOGLE_MALLOC_SECTION_BEGIN namespace tcmalloc { -namespace tcmalloc_internal { +namespace tcmalloc_internal { // This -if linked into a binary - overrides page_allocator.cc and // forces HPAA off/subrelease off. @@ -25,6 +25,6 @@ ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return -1; } ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; } -} // namespace tcmalloc_internal +} // namespace tcmalloc_internal } // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc b/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc index 3f0519dd50..1fc1cf0a17 100644 --- a/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc +++ b/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc @@ -1,28 +1,28 @@ -// Copyright 2021 The TCMalloc Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/base/attributes.h" -#include "tcmalloc/internal/config.h" - -GOOGLE_MALLOC_SECTION_BEGIN -namespace tcmalloc { -namespace tcmalloc_internal { - -// When linked into a binary this overrides the weak implementation in numa.cc -// and causes TCMalloc to enable NUMA awareness by default. -ABSL_ATTRIBUTE_UNUSED bool default_want_numa_aware() { return true; } - -} // namespace tcmalloc_internal -} // namespace tcmalloc -GOOGLE_MALLOC_SECTION_END +// Copyright 2021 The TCMalloc Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/attributes.h" +#include "tcmalloc/internal/config.h" + +GOOGLE_MALLOC_SECTION_BEGIN +namespace tcmalloc { +namespace tcmalloc_internal { + +// When linked into a binary this overrides the weak implementation in numa.cc +// and causes TCMalloc to enable NUMA awareness by default. +ABSL_ATTRIBUTE_UNUSED bool default_want_numa_aware() { return true; } + +} // namespace tcmalloc_internal +} // namespace tcmalloc +GOOGLE_MALLOC_SECTION_END diff --git a/contrib/libs/tcmalloc/ya.make b/contrib/libs/tcmalloc/ya.make index 54701b1b77..dfd1706aad 100644 --- a/contrib/libs/tcmalloc/ya.make +++ b/contrib/libs/tcmalloc/ya.make @@ -6,33 +6,33 @@ LICENSE_TEXTS(.yandex_meta/licenses.list.txt) OWNER( ayles - prime + prime g:cpp-contrib ) # https://github.com/google/tcmalloc -VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) +VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376) SRCS( - # Options - tcmalloc/want_hpaa.cc + # Options + tcmalloc/want_hpaa.cc ) -INCLUDE(common.inc) +INCLUDE(common.inc) CFLAGS( -DTCMALLOC_256K_PAGES ) END() - + IF (NOT DLL_FOR) - RECURSE( - default - dynamic - malloc_extension - numa_256k - numa_large_pages - slow_but_small - ) -ENDIF() + RECURSE( + default + dynamic + malloc_extension + numa_256k + numa_large_pages + slow_but_small + ) +ENDIF() |